This commit is contained in:
svlandeg 2021-10-29 13:25:15 +02:00
parent 79d5957c47
commit 753f9ee685
7 changed files with 57 additions and 94 deletions

View File

@ -75,12 +75,11 @@ grad_factor = 1.0
factory = "parser" factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser" state_type = "parser"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = false
nO = null nO = null
[components.parser.model.tok2vec] [components.parser.model.tok2vec]
@ -96,12 +95,11 @@ grad_factor = 1.0
factory = "ner" factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner" state_type = "ner"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = false
nO = null nO = null
[components.ner.model.tok2vec] [components.ner.model.tok2vec]
@ -257,12 +255,11 @@ width = ${components.tok2vec.model.encode.width}
factory = "parser" factory = "parser"
[components.parser.model] [components.parser.model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser" state_type = "parser"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 128 hidden_width = 128
maxout_pieces = 3 maxout_pieces = 3
use_upper = true
nO = null nO = null
[components.parser.model.tok2vec] [components.parser.model.tok2vec]
@ -275,12 +272,11 @@ width = ${components.tok2vec.model.encode.width}
factory = "ner" factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner" state_type = "ner"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true
nO = null nO = null
[components.ner.model.tok2vec] [components.ner.model.tok2vec]

View File

@ -12,49 +12,8 @@ TransitionSystem = Any # TODO
State = Any # TODO State = Any # TODO
@registry.architectures.register("spacy.TransitionBasedParser.v1")
def transition_parser_v1(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool = True,
nO: Optional[int] = None,
) -> Model[Tuple[List[Doc], TransitionSystem], List[Tuple[State, List[Floats2d]]]]:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
use_upper,
nO,
)
@registry.architectures.register("spacy.TransitionBasedParser.v2")
def transition_parser_v2(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool,
nO: Optional[int] = None,
) -> Model:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
nO=nO,
)
@registry.architectures.register("spacy.TransitionBasedParser.v3") @registry.architectures.register("spacy.TransitionBasedParser.v3")
def transition_parser_v2( def transition_parser_v3(
tok2vec: Model[List[Doc], List[Floats2d]], tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"], state_type: Literal["parser", "ner"],
extra_state_tokens: bool, extra_state_tokens: bool,
@ -111,14 +70,7 @@ def build_tb_parser_model(
feature sets (for the NER) or 13 (for the parser). feature sets (for the NER) or 13 (for the parser).
hidden_width (int): The width of the hidden layer. hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer. maxout_pieces (int): How many pieces to use in the state prediction layer.
Recommended values are 1, 2 or 3. If 1, the maxout non-linearity Recommended values are 1, 2 or 3.
is replaced with a ReLu non-linearity if use_upper=True, and no
non-linearity if use_upper=False.
use_upper (bool): Whether to use an additional hidden layer after the state
vector in order to predict the action scores. It is recommended to set
this to False for large pretrained models such as transformers, and False
for smaller networks. The upper layer is computed on CPU, which becomes
a bottleneck on larger GPU-based models, where it's also less necessary.
nO (int or None): The number of actions the model will predict between. nO (int or None): The number of actions the model will predict between.
Usually inferred from data at the beginning of training, or loaded from Usually inferred from data at the beginning of training, or loaded from
disk. disk.

View File

@ -16,12 +16,11 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser" state_type = "parser"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true
[model.tok2vec] [model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1" @architectures = "spacy.HashEmbedCNN.v1"
@ -62,7 +61,7 @@ def make_parser(
moves: Optional[list], moves: Optional[list],
update_with_oracle_cut_size: int, update_with_oracle_cut_size: int,
learn_tokens: bool, learn_tokens: bool,
min_action_freq: int min_action_freq: int,
): ):
"""Create a transition-based DependencyParser component. The dependency parser """Create a transition-based DependencyParser component. The dependency parser
jointly learns sentence segmentation and labelled dependency parsing, and can jointly learns sentence segmentation and labelled dependency parsing, and can
@ -114,6 +113,7 @@ def make_parser(
beam_update_prob=0.0, beam_update_prob=0.0,
) )
@Language.factory( @Language.factory(
"beam_parser", "beam_parser",
assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"], assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"],
@ -195,7 +195,7 @@ def make_beam_parser(
beam_update_prob=beam_update_prob, beam_update_prob=beam_update_prob,
multitasks=[], multitasks=[],
learn_tokens=learn_tokens, learn_tokens=learn_tokens,
min_action_freq=min_action_freq min_action_freq=min_action_freq,
) )
@ -204,6 +204,7 @@ class DependencyParser(Parser):
DOCS: https://nightly.spacy.io/api/dependencyparser DOCS: https://nightly.spacy.io/api/dependencyparser
""" """
TransitionSystem = ArcEager TransitionSystem = ArcEager
@property @property
@ -245,16 +246,21 @@ class DependencyParser(Parser):
DOCS: https://nightly.spacy.io/api/dependencyparser#score DOCS: https://nightly.spacy.io/api/dependencyparser#score
""" """
def has_sents(doc): def has_sents(doc):
return doc.has_annotation("SENT_START") return doc.has_annotation("SENT_START")
validate_examples(examples, "DependencyParser.score") validate_examples(examples, "DependencyParser.score")
def dep_getter(token, attr): def dep_getter(token, attr):
dep = getattr(token, attr) dep = getattr(token, attr)
dep = token.vocab.strings.as_string(dep).lower() dep = token.vocab.strings.as_string(dep).lower()
return dep return dep
results = {} results = {}
results.update(Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)) results.update(
Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)
)
kwargs.setdefault("getter", dep_getter) kwargs.setdefault("getter", dep_getter)
kwargs.setdefault("ignore_labels", ("p", "punct")) kwargs.setdefault("ignore_labels", ("p", "punct"))
results.update(Scorer.score_deps(examples, "dep", **kwargs)) results.update(Scorer.score_deps(examples, "dep", **kwargs))

View File

@ -13,12 +13,11 @@ from ..training import validate_examples
default_model_config = """ default_model_config = """
[model] [model]
@architectures = "spacy.TransitionBasedParser.v2" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner" state_type = "ner"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 64 hidden_width = 64
maxout_pieces = 2 maxout_pieces = 2
use_upper = true
[model.tok2vec] [model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1" @architectures = "spacy.HashEmbedCNN.v1"
@ -41,8 +40,12 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]
"update_with_oracle_cut_size": 100, "update_with_oracle_cut_size": 100,
"model": DEFAULT_NER_MODEL, "model": DEFAULT_NER_MODEL,
}, },
default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None}, default_score_weights={
"ents_f": 1.0,
"ents_p": 0.0,
"ents_r": 0.0,
"ents_per_type": None,
},
) )
def make_ner( def make_ner(
nlp: Language, nlp: Language,
@ -89,6 +92,7 @@ def make_ner(
beam_update_prob=0.0, beam_update_prob=0.0,
) )
@Language.factory( @Language.factory(
"beam_ner", "beam_ner",
assigns=["doc.ents", "token.ent_iob", "token.ent_type"], assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
@ -98,9 +102,14 @@ def make_ner(
"model": DEFAULT_NER_MODEL, "model": DEFAULT_NER_MODEL,
"beam_density": 0.01, "beam_density": 0.01,
"beam_update_prob": 0.5, "beam_update_prob": 0.5,
"beam_width": 32 "beam_width": 32,
},
default_score_weights={
"ents_f": 1.0,
"ents_p": 0.0,
"ents_r": 0.0,
"ents_per_type": None,
}, },
default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None},
) )
def make_beam_ner( def make_beam_ner(
nlp: Language, nlp: Language,
@ -165,6 +174,7 @@ class EntityRecognizer(Parser):
DOCS: https://nightly.spacy.io/api/entityrecognizer DOCS: https://nightly.spacy.io/api/entityrecognizer
""" """
TransitionSystem = BiluoPushDown TransitionSystem = BiluoPushDown
def add_multitask_objective(self, mt_component): def add_multitask_objective(self, mt_component):
@ -184,8 +194,11 @@ class EntityRecognizer(Parser):
def labels(self): def labels(self):
# Get the labels from the model by looking at the available moves, e.g. # Get the labels from the model by looking at the available moves, e.g.
# B-PERSON, I-PERSON, L-PERSON, U-PERSON # B-PERSON, I-PERSON, L-PERSON, U-PERSON
labels = set(move.split("-")[1] for move in self.move_names labels = set(
if move[0] in ("B", "I", "L", "U")) move.split("-")[1]
for move in self.move_names
if move[0] in ("B", "I", "L", "U")
)
return tuple(sorted(labels)) return tuple(sorted(labels))
def score(self, examples, **kwargs): def score(self, examples, **kwargs):

View File

@ -246,7 +246,7 @@ def test_empty_ner():
def test_ruler_before_ner(): def test_ruler_before_ner():
""" Test that an NER works after an entity_ruler: the second can add annotations """ """Test that an NER works after an entity_ruler: the second can add annotations"""
nlp = English() nlp = English()
# 1 : Entity Ruler - should set "this" to B and everything else to empty # 1 : Entity Ruler - should set "this" to B and everything else to empty
@ -266,7 +266,7 @@ def test_ruler_before_ner():
def test_ner_before_ruler(): def test_ner_before_ruler():
""" Test that an entity_ruler works after an NER: the second can overwrite O annotations """ """Test that an entity_ruler works after an NER: the second can overwrite O annotations"""
nlp = English() nlp = English()
# 1: untrained NER - should set everything to O # 1: untrained NER - should set everything to O
@ -287,7 +287,7 @@ def test_ner_before_ruler():
def test_block_ner(): def test_block_ner():
""" Test functionality for blocking tokens so they can't be in a named entity """ """Test functionality for blocking tokens so they can't be in a named entity"""
# block "Antti L Korhonen" from being a named entity # block "Antti L Korhonen" from being a named entity
nlp = English() nlp = English()
nlp.add_pipe("blocker", config={"start": 2, "end": 5}) nlp.add_pipe("blocker", config={"start": 2, "end": 5})
@ -301,11 +301,10 @@ def test_block_ner():
assert [token.ent_type_ for token in doc] == expected_types assert [token.ent_type_ for token in doc] == expected_types
@pytest.mark.parametrize("use_upper", [True, False]) def test_overfitting_IO():
def test_overfitting_IO(use_upper):
# Simple test to try and quickly overfit the NER component # Simple test to try and quickly overfit the NER component
nlp = English() nlp = English()
ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}}) ner = nlp.add_pipe("ner")
train_examples = [] train_examples = []
for text, annotations in TRAIN_DATA: for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations)) train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
@ -337,7 +336,6 @@ def test_overfitting_IO(use_upper):
assert ents2[0].label_ == "LOC" assert ents2[0].label_ == "LOC"
# Ensure that the predictions are still the same, even after adding a new label # Ensure that the predictions are still the same, even after adding a new label
ner2 = nlp2.get_pipe("ner") ner2 = nlp2.get_pipe("ner")
assert ner2.model.attrs["has_upper"] == use_upper
ner2.add_label("RANDOM_NEW_LABEL") ner2.add_label("RANDOM_NEW_LABEL")
doc3 = nlp2(test_text) doc3 = nlp2(test_text)
ents3 = doc3.ents ents3 = doc3.ents

View File

@ -494,18 +494,17 @@ for a Tok2Vec layer.
## Parser & NER architectures {#parser} ## Parser & NER architectures {#parser}
### spacy.TransitionBasedParser.v2 {#TransitionBasedParser source="spacy/ml/models/parser.py"} ### spacy.TransitionBasedParser.v3 {#TransitionBasedParser source="spacy/ml/models/parser.py"}
> #### Example Config > #### Example Config
> >
> ```ini > ```ini
> [model] > [model]
> @architectures = "spacy.TransitionBasedParser.v2" > @architectures = "spacy.TransitionBasedParser.v3"
> state_type = "ner" > state_type = "ner"
> extra_state_tokens = false > extra_state_tokens = false
> hidden_width = 64 > hidden_width = 64
> maxout_pieces = 2 > maxout_pieces = 2
> use_upper = true
> >
> [model.tok2vec] > [model.tok2vec]
> @architectures = "spacy.HashEmbedCNN.v1" > @architectures = "spacy.HashEmbedCNN.v1"
@ -535,16 +534,15 @@ consists of either two or three subnetworks:
state representation. If not present, the output from the lower model is used state representation. If not present, the output from the lower model is used
as action scores directly. as action scores directly.
| Name | Description | | Name | Description |
| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ | | `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
| `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ | | `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ |
| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ | | `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ |
| `hidden_width` | The width of the hidden layer. ~~int~~ | | `hidden_width` | The width of the hidden layer. ~~int~~ |
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ | | `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. ~~int~~ |
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ | | `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ | | **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |
## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"} ## Tagging architectures {#tagger source="spacy/ml/models/tagger.py"}

View File

@ -141,7 +141,7 @@ factory = "tok2vec"
factory = "ner" factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v3"
[components.ner.model.tok2vec] [components.ner.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1" @architectures = "spacy.Tok2VecListener.v1"
@ -158,7 +158,7 @@ same. This makes them fully independent and doesn't require an upstream
factory = "ner" factory = "ner"
[components.ner.model] [components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v3"
[components.ner.model.tok2vec] [components.ner.model.tok2vec]
@architectures = "spacy.Tok2Vec.v2" @architectures = "spacy.Tok2Vec.v2"
@ -446,7 +446,7 @@ sneakily delegates to the `Transformer` pipeline component.
factory = "ner" factory = "ner"
[nlp.pipeline.ner.model] [nlp.pipeline.ner.model]
@architectures = "spacy.TransitionBasedParser.v1" @architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner" state_type = "ner"
extra_state_tokens = false extra_state_tokens = false
hidden_width = 128 hidden_width = 128