This commit is contained in:
svlandeg 2021-10-29 13:25:15 +02:00
parent 79d5957c47
commit 753f9ee685
7 changed files with 57 additions and 94 deletions

View File

@ -75,12 +75,11 @@ grad_factor = 1.0
factory = "parser"
[components.parser.model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser"
extra_state_tokens = false
hidden_width = 128
maxout_pieces = 3
use_upper = false
nO = null
[components.parser.model.tok2vec]
@ -96,12 +95,11 @@ grad_factor = 1.0
factory = "ner"
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = false
nO = null
[components.ner.model.tok2vec]
@ -257,12 +255,11 @@ width = ${components.tok2vec.model.encode.width}
factory = "parser"
[components.parser.model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser"
extra_state_tokens = false
hidden_width = 128
maxout_pieces = 3
use_upper = true
nO = null
[components.parser.model.tok2vec]
@ -275,12 +272,11 @@ width = ${components.tok2vec.model.encode.width}
factory = "ner"
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
nO = null
[components.ner.model.tok2vec]

View File

@ -12,49 +12,8 @@ TransitionSystem = Any # TODO
State = Any # TODO
@registry.architectures.register("spacy.TransitionBasedParser.v1")
def transition_parser_v1(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool = True,
nO: Optional[int] = None,
) -> Model[Tuple[List[Doc], TransitionSystem], List[Tuple[State, List[Floats2d]]]]:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
use_upper,
nO,
)
@registry.architectures.register("spacy.TransitionBasedParser.v2")
def transition_parser_v2(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
hidden_width: int,
maxout_pieces: int,
use_upper: bool,
nO: Optional[int] = None,
) -> Model:
return build_tb_parser_model(
tok2vec,
state_type,
extra_state_tokens,
hidden_width,
maxout_pieces,
nO=nO,
)
@registry.architectures.register("spacy.TransitionBasedParser.v3")
def transition_parser_v2(
def transition_parser_v3(
tok2vec: Model[List[Doc], List[Floats2d]],
state_type: Literal["parser", "ner"],
extra_state_tokens: bool,
@ -111,14 +70,7 @@ def build_tb_parser_model(
feature sets (for the NER) or 13 (for the parser).
hidden_width (int): The width of the hidden layer.
maxout_pieces (int): How many pieces to use in the state prediction layer.
Recommended values are 1, 2 or 3. If 1, the maxout non-linearity
is replaced with a ReLu non-linearity if use_upper=True, and no
non-linearity if use_upper=False.
use_upper (bool): Whether to use an additional hidden layer after the state
vector in order to predict the action scores. It is recommended to set
this to False for large pretrained models such as transformers, and False
for smaller networks. The upper layer is computed on CPU, which becomes
a bottleneck on larger GPU-based models, where it's also less necessary.
Recommended values are 1, 2 or 3.
nO (int or None): The number of actions the model will predict between.
Usually inferred from data at the beginning of training, or loaded from
disk.

View File

@ -16,12 +16,11 @@ from ..training import validate_examples
default_model_config = """
[model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "parser"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
@ -62,7 +61,7 @@ def make_parser(
moves: Optional[list],
update_with_oracle_cut_size: int,
learn_tokens: bool,
min_action_freq: int
min_action_freq: int,
):
"""Create a transition-based DependencyParser component. The dependency parser
jointly learns sentence segmentation and labelled dependency parsing, and can
@ -114,6 +113,7 @@ def make_parser(
beam_update_prob=0.0,
)
@Language.factory(
"beam_parser",
assigns=["token.dep", "token.head", "token.is_sent_start", "doc.sents"],
@ -195,7 +195,7 @@ def make_beam_parser(
beam_update_prob=beam_update_prob,
multitasks=[],
learn_tokens=learn_tokens,
min_action_freq=min_action_freq
min_action_freq=min_action_freq,
)
@ -204,6 +204,7 @@ class DependencyParser(Parser):
DOCS: https://nightly.spacy.io/api/dependencyparser
"""
TransitionSystem = ArcEager
@property
@ -245,16 +246,21 @@ class DependencyParser(Parser):
DOCS: https://nightly.spacy.io/api/dependencyparser#score
"""
def has_sents(doc):
return doc.has_annotation("SENT_START")
validate_examples(examples, "DependencyParser.score")
def dep_getter(token, attr):
dep = getattr(token, attr)
dep = token.vocab.strings.as_string(dep).lower()
return dep
results = {}
results.update(Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs))
results.update(
Scorer.score_spans(examples, "sents", has_annotation=has_sents, **kwargs)
)
kwargs.setdefault("getter", dep_getter)
kwargs.setdefault("ignore_labels", ("p", "punct"))
results.update(Scorer.score_deps(examples, "dep", **kwargs))

View File

@ -13,12 +13,11 @@ from ..training import validate_examples
default_model_config = """
[model]
@architectures = "spacy.TransitionBasedParser.v2"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner"
extra_state_tokens = false
hidden_width = 64
maxout_pieces = 2
use_upper = true
[model.tok2vec]
@architectures = "spacy.HashEmbedCNN.v1"
@ -41,8 +40,12 @@ DEFAULT_NER_MODEL = Config().from_str(default_model_config)["model"]
"update_with_oracle_cut_size": 100,
"model": DEFAULT_NER_MODEL,
},
default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None},
default_score_weights={
"ents_f": 1.0,
"ents_p": 0.0,
"ents_r": 0.0,
"ents_per_type": None,
},
)
def make_ner(
nlp: Language,
@ -89,6 +92,7 @@ def make_ner(
beam_update_prob=0.0,
)
@Language.factory(
"beam_ner",
assigns=["doc.ents", "token.ent_iob", "token.ent_type"],
@ -98,9 +102,14 @@ def make_ner(
"model": DEFAULT_NER_MODEL,
"beam_density": 0.01,
"beam_update_prob": 0.5,
"beam_width": 32
"beam_width": 32,
},
default_score_weights={
"ents_f": 1.0,
"ents_p": 0.0,
"ents_r": 0.0,
"ents_per_type": None,
},
default_score_weights={"ents_f": 1.0, "ents_p": 0.0, "ents_r": 0.0, "ents_per_type": None},
)
def make_beam_ner(
nlp: Language,
@ -165,6 +174,7 @@ class EntityRecognizer(Parser):
DOCS: https://nightly.spacy.io/api/entityrecognizer
"""
TransitionSystem = BiluoPushDown
def add_multitask_objective(self, mt_component):
@ -184,8 +194,11 @@ class EntityRecognizer(Parser):
def labels(self):
# Get the labels from the model by looking at the available moves, e.g.
# B-PERSON, I-PERSON, L-PERSON, U-PERSON
labels = set(move.split("-")[1] for move in self.move_names
if move[0] in ("B", "I", "L", "U"))
labels = set(
move.split("-")[1]
for move in self.move_names
if move[0] in ("B", "I", "L", "U")
)
return tuple(sorted(labels))
def score(self, examples, **kwargs):

View File

@ -301,11 +301,10 @@ def test_block_ner():
assert [token.ent_type_ for token in doc] == expected_types
@pytest.mark.parametrize("use_upper", [True, False])
def test_overfitting_IO(use_upper):
def test_overfitting_IO():
# Simple test to try and quickly overfit the NER component
nlp = English()
ner = nlp.add_pipe("ner", config={"model": {"use_upper": use_upper}})
ner = nlp.add_pipe("ner")
train_examples = []
for text, annotations in TRAIN_DATA:
train_examples.append(Example.from_dict(nlp.make_doc(text), annotations))
@ -337,7 +336,6 @@ def test_overfitting_IO(use_upper):
assert ents2[0].label_ == "LOC"
# Ensure that the predictions are still the same, even after adding a new label
ner2 = nlp2.get_pipe("ner")
assert ner2.model.attrs["has_upper"] == use_upper
ner2.add_label("RANDOM_NEW_LABEL")
doc3 = nlp2(test_text)
ents3 = doc3.ents

View File

@ -494,18 +494,17 @@ for a Tok2Vec layer.
## Parser & NER architectures {#parser}
### spacy.TransitionBasedParser.v2 {#TransitionBasedParser source="spacy/ml/models/parser.py"}
### spacy.TransitionBasedParser.v3 {#TransitionBasedParser source="spacy/ml/models/parser.py"}
> #### Example Config
>
> ```ini
> [model]
> @architectures = "spacy.TransitionBasedParser.v2"
> @architectures = "spacy.TransitionBasedParser.v3"
> state_type = "ner"
> extra_state_tokens = false
> hidden_width = 64
> maxout_pieces = 2
> use_upper = true
>
> [model.tok2vec]
> @architectures = "spacy.HashEmbedCNN.v1"
@ -536,13 +535,12 @@ consists of either two or three subnetworks:
as action scores directly.
| Name | Description |
| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| -------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `tok2vec` | Subnetwork to map tokens into vector representations. ~~Model[List[Doc], List[Floats2d]]~~ |
| `state_type` | Which task to extract features for. Possible values are "ner" and "parser". ~~str~~ |
| `extra_state_tokens` | Whether to use an expanded feature set when extracting the state tokens. Slightly slower, but sometimes improves accuracy slightly. Defaults to `False`. ~~bool~~ |
| `hidden_width` | The width of the hidden layer. ~~int~~ |
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. If `1`, the maxout non-linearity is replaced with a [`Relu`](https://thinc.ai/docs/api-layers#relu) non-linearity if `use_upper` is `True`, and no non-linearity if `False`. ~~int~~ |
| `use_upper` | Whether to use an additional hidden layer after the state vector in order to predict the action scores. It is recommended to set this to `False` for large pretrained models such as transformers, and `True` for smaller networks. The upper layer is computed on CPU, which becomes a bottleneck on larger GPU-based models, where it's also less necessary. ~~bool~~ |
| `maxout_pieces` | How many pieces to use in the state prediction layer. Recommended values are `1`, `2` or `3`. ~~int~~ |
| `nO` | The number of actions the model will predict between. Usually inferred from data at the beginning of training, or loaded from disk. ~~int~~ |
| **CREATES** | The model using the architecture. ~~Model[List[Docs], List[List[Floats2d]]]~~ |

View File

@ -141,7 +141,7 @@ factory = "tok2vec"
factory = "ner"
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1"
@architectures = "spacy.TransitionBasedParser.v3"
[components.ner.model.tok2vec]
@architectures = "spacy.Tok2VecListener.v1"
@ -158,7 +158,7 @@ same. This makes them fully independent and doesn't require an upstream
factory = "ner"
[components.ner.model]
@architectures = "spacy.TransitionBasedParser.v1"
@architectures = "spacy.TransitionBasedParser.v3"
[components.ner.model.tok2vec]
@architectures = "spacy.Tok2Vec.v2"
@ -446,7 +446,7 @@ sneakily delegates to the `Transformer` pipeline component.
factory = "ner"
[nlp.pipeline.ner.model]
@architectures = "spacy.TransitionBasedParser.v1"
@architectures = "spacy.TransitionBasedParser.v3"
state_type = "ner"
extra_state_tokens = false
hidden_width = 128