Fix imports, types and default configs

This commit is contained in:
Ines Montani 2020-08-07 18:40:54 +02:00
parent b1d83fc13e
commit 3a193eb8f1
14 changed files with 14 additions and 24 deletions

View File

@ -5,6 +5,7 @@ from thinc.types import Floats2d
from ...util import registry from ...util import registry
from .._precomputable_affine import PrecomputableAffine from .._precomputable_affine import PrecomputableAffine
from ..tb_framework import TransitionModel from ..tb_framework import TransitionModel
from ...tokens import Doc
@registry.architectures.register("spacy.TransitionBasedParser.v1") @registry.architectures.register("spacy.TransitionBasedParser.v1")
@ -35,7 +36,7 @@ def build_tb_parser_model(
and applying the non-linearity. and applying the non-linearity.
* upper (optional): A feed-forward network that predicts scores from the * upper (optional): A feed-forward network that predicts scores from the
state representation. If not present, the output from the lower model is state representation. If not present, the output from the lower model is
ued as action scores directly. used as action scores directly.
tok2vec (Model[List[Doc], List[Floats2d]]): tok2vec (Model[List[Doc], List[Floats2d]]):
Subnetwork to map tokens into vector representations. Subnetwork to map tokens into vector representations.

View File

@ -10,7 +10,7 @@ from .._iob import IOB
from ...util import registry from ...util import registry
@registry.architectures.register("spacy.BiluoTagger.v1") @registry.architectures.register("spacy.BILUOTagger.v1")
def BiluoTagger( def BiluoTagger(
tok2vec: Model[List[Doc], List[Floats2d]] tok2vec: Model[List[Doc], List[Floats2d]]
) -> Model[List[Doc], List[Floats2d]]: ) -> Model[List[Doc], List[Floats2d]]:
@ -59,7 +59,7 @@ def IOBTagger(
token and uses greedy decoding with transition-constraints to return a valid token and uses greedy decoding with transition-constraints to return a valid
IOB tag sequence. IOB tag sequence.
A IOB tag sequence encodes a sequence of non-overlapping labelled spans An IOB tag sequence encodes a sequence of non-overlapping labelled spans
into tags assigned to each token. The first token of a span is given the into tags assigned to each token. The first token of a span is given the
tag B-LABEL, and subsequent tokens are given the tag I-LABEL. tag B-LABEL, and subsequent tokens are given the tag I-LABEL.
All other tokens are assigned the tag O. All other tokens are assigned the tag O.

View File

@ -3,7 +3,7 @@ from thinc.api import zero_init, with_array, Softmax, chain, Model
from thinc.types import Floats2d from thinc.types import Floats2d
from ...util import registry from ...util import registry
from ..tokens import Doc from ...tokens import Doc
@registry.architectures.register("spacy.Tagger.v1") @registry.architectures.register("spacy.Tagger.v1")

View File

@ -77,7 +77,7 @@ def build_Tok2Vec_model(
"""Construct a tok2vec model out of embedding and encoding subnetworks. """Construct a tok2vec model out of embedding and encoding subnetworks.
See https://explosion.ai/blog/deep-learning-formula-nlp See https://explosion.ai/blog/deep-learning-formula-nlp
embed (Model[List[Doc], List[Floats2d]]): Embed tokens into context-indepdent embed (Model[List[Doc], List[Floats2d]]): Embed tokens into context-independent
word vector representations. word vector representations.
encode (Model[List[Floats2d], List[Floats2d]]): Encode context into the encode (Model[List[Floats2d], List[Floats2d]]): Encode context into the
embeddings, using an architecture such as a CNN, BiLSTM or transformer. embeddings, using an architecture such as a CNN, BiLSTM or transformer.

View File

@ -27,7 +27,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_PARSER_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -29,7 +29,6 @@ embed_size = 300
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_NEL_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -29,7 +29,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 2 maxout_pieces = 2
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_MT_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_MT_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -25,7 +25,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -25,7 +25,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 2 maxout_pieces = 2
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_SENTER_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -15,7 +15,7 @@ from .pipe import Pipe
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.BiluoTagger.v1" @architectures = "spacy.BILUOTagger.v1"
[model.tok2vec] [model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1" @architectures = "spacy.HashEmbedCNN.v1"
@ -26,7 +26,6 @@ embed_size = 7000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_SIMPLE_NER_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -31,7 +31,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_TAGGER_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -48,7 +48,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """

View File

@ -20,7 +20,6 @@ embed_size = 2000
window_size = 1 window_size = 1
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
""" """
DEFAULT_TOK2VEC_MODEL = Config().from_str(default_model_config)["model"] DEFAULT_TOK2VEC_MODEL = Config().from_str(default_model_config)["model"]

View File

@ -48,7 +48,6 @@ window_size = 1
embed_size = 2000 embed_size = 2000
maxout_pieces = 3 maxout_pieces = 3
subword_features = true subword_features = true
dropout = null
[components.tagger] [components.tagger]
factory = "tagger" factory = "tagger"
@ -78,7 +77,6 @@ embed_size = 5555
window_size = 1 window_size = 1
maxout_pieces = 7 maxout_pieces = 7
subword_features = false subword_features = false
dropout = null
""" """