diff --git a/spacy/cli/templates/quickstart_training.jinja b/spacy/cli/templates/quickstart_training.jinja index ab1d69894..ff190804c 100644 --- a/spacy/cli/templates/quickstart_training.jinja +++ b/spacy/cli/templates/quickstart_training.jinja @@ -75,12 +75,11 @@ grad_factor = 1.0 factory = "parser" [components.parser.model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "parser" extra_state_tokens = false hidden_width = 128 maxout_pieces = 3 -use_upper = false nO = null [components.parser.model.tok2vec] @@ -96,12 +95,11 @@ grad_factor = 1.0 factory = "ner" [components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 -use_upper = false nO = null [components.ner.model.tok2vec] @@ -257,12 +255,11 @@ width = ${components.tok2vec.model.encode.width} factory = "parser" [components.parser.model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "parser" extra_state_tokens = false hidden_width = 128 maxout_pieces = 3 -use_upper = true nO = null [components.parser.model.tok2vec] @@ -275,12 +272,11 @@ width = ${components.tok2vec.model.encode.width} factory = "ner" [components.ner.model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 -use_upper = true nO = null [components.ner.model.tok2vec] diff --git a/spacy/ml/models/parser.py b/spacy/ml/models/parser.py index fd476382f..bbc5bf957 100644 --- a/spacy/ml/models/parser.py +++ b/spacy/ml/models/parser.py @@ -12,49 +12,8 @@ TransitionSystem = Any # TODO State = Any # TODO -@registry.architectures.register("spacy.TransitionBasedParser.v1") -def transition_parser_v1( - tok2vec: Model[List[Doc], List[Floats2d]], - state_type: Literal["parser", "ner"], - extra_state_tokens: bool, - hidden_width: int, - maxout_pieces: int, - use_upper: bool = True, - nO: Optional[int] = None, -) -> Model[Tuple[List[Doc], TransitionSystem], List[Tuple[State, List[Floats2d]]]]: - return build_tb_parser_model( - tok2vec, - state_type, - extra_state_tokens, - hidden_width, - maxout_pieces, - use_upper, - nO, - ) - - -@registry.architectures.register("spacy.TransitionBasedParser.v2") -def transition_parser_v2( - tok2vec: Model[List[Doc], List[Floats2d]], - state_type: Literal["parser", "ner"], - extra_state_tokens: bool, - hidden_width: int, - maxout_pieces: int, - use_upper: bool, - nO: Optional[int] = None, -) -> Model: - return build_tb_parser_model( - tok2vec, - state_type, - extra_state_tokens, - hidden_width, - maxout_pieces, - nO=nO, - ) - - @registry.architectures.register("spacy.TransitionBasedParser.v3") -def transition_parser_v2( +def transition_parser_v3( tok2vec: Model[List[Doc], List[Floats2d]], state_type: Literal["parser", "ner"], extra_state_tokens: bool, @@ -111,14 +70,7 @@ def build_tb_parser_model( feature sets (for the NER) or 13 (for the parser). hidden_width (int): The width of the hidden layer. maxout_pieces (int): How many pieces to use in the state prediction layer. - Recommended values are 1, 2 or 3. If 1, the maxout non-linearity - is replaced with a ReLu non-linearity if use_upper=True, and no - non-linearity if use_upper=False. - use_upper (bool): Whether to use an additional hidden layer after the state - vector in order to predict the action scores. It is recommended to set - this to False for large pretrained models such as transformers, and False - for smaller networks. The upper layer is computed on CPU, which becomes - a bottleneck on larger GPU-based models, where it's also less necessary. + Recommended values are 1, 2 or 3. nO (int or None): The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. diff --git a/spacy/pipeline/dep_parser.py b/spacy/pipeline/dep_parser.py index 7bdb2849d..02ae63925 100644 --- a/spacy/pipeline/dep_parser.py +++ b/spacy/pipeline/dep_parser.py @@ -16,12 +16,11 @@ from ..training import validate_examples default_model_config = """ [model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "parser" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 -use_upper = true [model.tok2vec] @architectures = "spacy.HashEmbedCNN.v1" @@ -62,7 +61,7 @@ def make_parser( moves: Optional[list], update_with_oracle_cut_size: int, learn_tokens: bool, - min_action_freq: int + min_action_freq: int, ): """Create a transition-based DependencyParser component. The dependency parser jointly learns sentence segmentation and labelled dependency parsing, and can @@ -114,6 +113,7 @@ def make_parser( beam_update_prob=0.0, ) + @Language.factory( "beam_parser", assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"], @@ -195,7 +195,7 @@ def make_beam_parser( beam_update_prob=beam_update_prob, multitasks=[], learn_tokens=learn_tokens, - min_action_freq=min_action_freq + min_action_freq=min_action_freq, ) @@ -204,6 +204,7 @@ class DependencyParser(Parser): DOCS: https://nightly.spacy.io/api/dependencyparser """ + TransitionSystem = ArcEager @property @@ -245,16 +246,21 @@ class DependencyParser(Parser): DOCS: https://nightly.spacy.io/api/dependencyparser#score """ + def has_sents(doc): return doc.has_annotation("SENT_START") validate_examples(examples, "DependencyParser.score") + def dep_getter(token, attr): dep = getattr(token, attr) dep = token.vocab.strings.as_string(dep).lower() return dep + results = {} - results.update(Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)) + results.update( + Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs) + ) kwargs.setdefault("getter", dep_getter) kwargs.setdefault("ignore_labels", ("p", "punct")) results.update(Scorer.score_deps(examples, "dep", **kwargs)) diff --git a/spacy/pipeline/ner.py b/spacy/pipeline/ner.py index cd2f9e1cf..474dec9bd 100644 --- a/spacy/pipeline/ner.py +++ b/spacy/pipeline/ner.py @@ -13,12 +13,11 @@ from ..training import validate_examples default_model_config = """ [model] -@architectures = "spacy.TransitionBasedParser.v2" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "ner" extra_state_tokens = false hidden_width = 64 maxout_pieces = 2 -use_upper = true [model.tok2vec] @architectures = "spacy.HashEmbedCNN.v1" @@ -41,8 +40,12 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"] "update_with_oracle_cut_size": 100, "model": DEFAULT_NER_MODEL, }, - default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None}, - + default_score_weights={ + "ents_f": 1.0, + "ents_p": 0.0, + "ents_r": 0.0, + "ents_per_type": None, + }, ) def make_ner( nlp: Language, @@ -89,6 +92,7 @@ def make_ner( beam_update_prob=0.0, ) + @Language.factory( "beam_ner", assigns=["doc.ents", "token.ent_iob", "token.ent_type"], @@ -98,9 +102,14 @@ def make_ner( "model": DEFAULT_NER_MODEL, "beam_density": 0.01, "beam_update_prob": 0.5, - "beam_width": 32 + "beam_width": 32, + }, + default_score_weights={ + "ents_f": 1.0, + "ents_p": 0.0, + "ents_r": 0.0, + "ents_per_type": None, }, - default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None}, ) def make_beam_ner( nlp: Language, @@ -165,6 +174,7 @@ class EntityRecognizer(Parser): DOCS: https://nightly.spacy.io/api/entityrecognizer """ + TransitionSystem = BiluoPushDown def add_multitask_objective(self, mt_component): @@ -184,8 +194,11 @@ class EntityRecognizer(Parser): def labels(self): # Get the labels from the model by looking at the available moves, e.g. # B-PERSON, I-PERSON, L-PERSON, U-PERSON - labels = set(move.split("-")[1] for move in self.move_names - if move[0] in ("B", "I", "L", "U")) + labels = set( + move.split("-")[1] + for move in self.move_names + if move[0] in ("B", "I", "L", "U") + ) return tuple(sorted(labels)) def score(self, examples, **kwargs): diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py index b22d2deee..0ff5c5a66 100644 --- a/spacy/tests/parser/test_ner.py +++ b/spacy/tests/parser/test_ner.py @@ -246,7 +246,7 @@ def test_empty_ner(): def test_ruler_before_ner(): - """ Test that an NER works after an entity_ruler: the second can add annotations """ + """Test that an NER works after an entity_ruler: the second can add annotations""" nlp = English() # 1 : Entity Ruler - should set "this" to B and everything else to empty @@ -266,7 +266,7 @@ def test_ruler_before_ner(): def test_ner_before_ruler(): - """ Test that an entity_ruler works after an NER: the second can overwrite O annotations """ + """Test that an entity_ruler works after an NER: the second can overwrite O annotations""" nlp = English() # 1: untrained NER - should set everything to O @@ -287,7 +287,7 @@ def test_ner_before_ruler(): def test_block_ner(): - """ Test functionality for blocking tokens so they can't be in a named entity """ + """Test functionality for blocking tokens so they can't be in a named entity""" # block "Antti L Korhonen" from being a named entity nlp = English() nlp.add_pipe("blocker", config={"start": 2, "end": 5}) @@ -301,11 +301,10 @@ def test_block_ner(): assert [token.ent_type_ for token in doc] == expected_types -@pytest.mark.parametrize("use_upper", [True, False]) -def test_overfitting_IO(use_upper): +def test_overfitting_IO(): # Simple test to try and quickly overfit the NER component nlp = English() - ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}}) + ner = nlp.add_pipe("ner") train_examples = [] for text, annotations in TRAIN_DATA: train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) @@ -337,7 +336,6 @@ def test_overfitting_IO(use_upper): assert ents2[0].label_ == "LOC" # Ensure that the predictions are still the same, even after adding a new label ner2 = nlp2.get_pipe("ner") - assert ner2.model.attrs["has_upper"] == use_upper ner2.add_label("RANDOM_NEW_LABEL") doc3 = nlp2(test_text) ents3 = doc3.ents diff --git a/website/docs/api/architectures.md b/website/docs/api/architectures.md index d8f0ce022..b1f274252 100644 --- a/website/docs/api/architectures.md +++ b/website/docs/api/architectures.md @@ -494,18 +494,17 @@ for a Tok2Vec layer. ## Parser & NER architectures {#parser} -### spacy.TransitionBasedParser.v2 {#TransitionBasedParser source="spacy/ml/models/parser.py"} +### spacy.TransitionBasedParser.v3 {#TransitionBasedParser source="spacy/ml/models/parser.py"} > #### Example Config > > ```ini > [model] -> @architectures = "spacy.TransitionBasedParser.v2" +> @architectures = "spacy.TransitionBasedParser.v3" > state_type = "ner" > extra_state_tokens = false > hidden_width = 64 > maxout_pieces = 2 -> use_upper = true > > [model.tok2vec] > @architectures = "spacy.HashEmbedCNN.v1" @@ -535,16 +534,15 @@ consists of either two or three subnetworks: state representation. If not present, the output from the lower model is used as action scores directly. -| Name | Description | -| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | -| `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ | -| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ | -| `hidden_width` | The width of the hidden layer. ~~int~~ | -| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ | -| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ | -| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ | -| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ | +| Name | Description | +| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | +| `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ | +| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ | +| `hidden_width` | The width of the hidden layer. ~~int~~ | +| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. ~~int~~ | +| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ | +| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ | ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"} diff --git a/website/docs/usage/embeddings-transformers.md b/website/docs/usage/embeddings-transformers.md index fdf15d187..b39bc3eb3 100644 --- a/website/docs/usage/embeddings-transformers.md +++ b/website/docs/usage/embeddings-transformers.md @@ -141,7 +141,7 @@ factory = "tok2vec" factory = "ner" [components.ner.model] -@architectures = "spacy.TransitionBasedParser.v1" +@architectures = "spacy.TransitionBasedParser.v3" [components.ner.model.tok2vec] @architectures = "spacy.Tok2VecListener.v1" @@ -158,7 +158,7 @@ same. This makes them fully independent and doesn't require an upstream factory = "ner" [components.ner.model] -@architectures = "spacy.TransitionBasedParser.v1" +@architectures = "spacy.TransitionBasedParser.v3" [components.ner.model.tok2vec] @architectures = "spacy.Tok2Vec.v2" @@ -446,7 +446,7 @@ sneakily delegates to the `Transformer` pipeline component. factory = "ner" [nlp.pipeline.ner.model] -@architectures = "spacy.TransitionBasedParser.v1" +@architectures = "spacy.TransitionBasedParser.v3" state_type = "ner" extra_state_tokens = false hidden_width = 128