mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-15 10:42:34 +03:00
update morphologizer, tagger test
This commit is contained in:
parent
0b3480cf3f
commit
36634d2adf
|
@ -69,7 +69,6 @@ grad_factor = 1.0
|
||||||
{% if "tagger" in components %}
|
{% if "tagger" in components %}
|
||||||
[components.tagger]
|
[components.tagger]
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
label_smoothing = 0.05
|
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v2"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
@ -287,6 +286,7 @@ maxout_pieces = 3
|
||||||
{% if "morphologizer" in components %}
|
{% if "morphologizer" in components %}
|
||||||
[components.morphologizer]
|
[components.morphologizer]
|
||||||
factory = "morphologizer"
|
factory = "morphologizer"
|
||||||
|
label_smoothing = 0.05
|
||||||
|
|
||||||
[components.morphologizer.model]
|
[components.morphologizer.model]
|
||||||
@architectures = "spacy.Tagger.v2"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
@ -300,6 +300,7 @@ width = ${components.tok2vec.model.encode.width}
|
||||||
{% if "tagger" in components %}
|
{% if "tagger" in components %}
|
||||||
[components.tagger]
|
[components.tagger]
|
||||||
factory = "tagger"
|
factory = "tagger"
|
||||||
|
label_smoothing = 0.05
|
||||||
|
|
||||||
[components.tagger.model]
|
[components.tagger.model]
|
||||||
@architectures = "spacy.Tagger.v2"
|
@architectures = "spacy.Tagger.v2"
|
||||||
|
|
|
@ -52,7 +52,8 @@ DEFAULT_MORPH_MODEL = Config().from_str(default_model_config)["model"]
|
||||||
@Language.factory(
|
@Language.factory(
|
||||||
"morphologizer",
|
"morphologizer",
|
||||||
assigns=["token.morph", "token.pos"],
|
assigns=["token.morph", "token.pos"],
|
||||||
default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False, "scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}},
|
default_config={"model": DEFAULT_MORPH_MODEL, "overwrite": True, "extend": False,
|
||||||
|
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"}, "label_smoothing": 0.0},
|
||||||
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
|
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
|
||||||
)
|
)
|
||||||
def make_morphologizer(
|
def make_morphologizer(
|
||||||
|
@ -61,9 +62,10 @@ def make_morphologizer(
|
||||||
name: str,
|
name: str,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
extend: bool,
|
extend: bool,
|
||||||
|
label_smoothing: float,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
):
|
):
|
||||||
return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, scorer=scorer)
|
return Morphologizer(nlp.vocab, model, name, overwrite=overwrite, extend=extend, label_smoothing=label_smoothing, scorer=scorer)
|
||||||
|
|
||||||
|
|
||||||
def morphologizer_score(examples, **kwargs):
|
def morphologizer_score(examples, **kwargs):
|
||||||
|
@ -94,6 +96,7 @@ class Morphologizer(Tagger):
|
||||||
*,
|
*,
|
||||||
overwrite: bool = BACKWARD_OVERWRITE,
|
overwrite: bool = BACKWARD_OVERWRITE,
|
||||||
extend: bool = BACKWARD_EXTEND,
|
extend: bool = BACKWARD_EXTEND,
|
||||||
|
label_smoothing: float = 0.0,
|
||||||
scorer: Optional[Callable] = morphologizer_score,
|
scorer: Optional[Callable] = morphologizer_score,
|
||||||
):
|
):
|
||||||
"""Initialize a morphologizer.
|
"""Initialize a morphologizer.
|
||||||
|
@ -121,6 +124,7 @@ class Morphologizer(Tagger):
|
||||||
"labels_pos": {},
|
"labels_pos": {},
|
||||||
"overwrite": overwrite,
|
"overwrite": overwrite,
|
||||||
"extend": extend,
|
"extend": extend,
|
||||||
|
"label_smoothing": label_smoothing,
|
||||||
}
|
}
|
||||||
self.cfg = dict(sorted(cfg.items()))
|
self.cfg = dict(sorted(cfg.items()))
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
|
@ -270,7 +274,8 @@ class Morphologizer(Tagger):
|
||||||
DOCS: https://spacy.io/api/morphologizer#get_loss
|
DOCS: https://spacy.io/api/morphologizer#get_loss
|
||||||
"""
|
"""
|
||||||
validate_examples(examples, "Morphologizer.get_loss")
|
validate_examples(examples, "Morphologizer.get_loss")
|
||||||
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False)
|
loss_func = SequenceCategoricalCrossentropy(names=self.labels, normalize=False,
|
||||||
|
label_smoothing=self.cfg["label_smoothing"])
|
||||||
truths = []
|
truths = []
|
||||||
for eg in examples:
|
for eg in examples:
|
||||||
eg_truths = []
|
eg_truths = []
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import pytest
|
import pytest
|
||||||
from numpy.testing import assert_equal
|
from numpy.testing import assert_equal, assert_almost_equal
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
|
@ -19,6 +19,8 @@ def test_label_types():
|
||||||
morphologizer.add_label(9)
|
morphologizer.add_label(9)
|
||||||
|
|
||||||
|
|
||||||
|
TAGS = ["Feat=N", "Feat=V", "Feat=J"]
|
||||||
|
|
||||||
TRAIN_DATA = [
|
TRAIN_DATA = [
|
||||||
(
|
(
|
||||||
"I like green eggs",
|
"I like green eggs",
|
||||||
|
@ -32,6 +34,29 @@ TRAIN_DATA = [
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def test_label_smoothing():
|
||||||
|
nlp = Language()
|
||||||
|
morph_no_ls = nlp.add_pipe("morphologizer", "no_label_smoothing")
|
||||||
|
morph_ls = nlp.add_pipe(
|
||||||
|
"morphologizer", "label_smoothing", config=dict(label_smoothing=0.05)
|
||||||
|
)
|
||||||
|
train_examples = []
|
||||||
|
losses = {}
|
||||||
|
for tag in TAGS:
|
||||||
|
morph_no_ls.add_label(tag)
|
||||||
|
morph_ls.add_label(tag)
|
||||||
|
for t in TRAIN_DATA:
|
||||||
|
train_examples.append(Example.from_dict(nlp.make_doc(t[0]), t[1]))
|
||||||
|
|
||||||
|
nlp.initialize(get_examples=lambda: train_examples)
|
||||||
|
tag_scores, bp_tag_scores = morph_ls.model.begin_update(
|
||||||
|
[eg.predicted for eg in train_examples]
|
||||||
|
)
|
||||||
|
no_ls_grads = morph_no_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
|
ls_grads = morph_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
|
assert_almost_equal(ls_grads / no_ls_grads, 0.94285715)
|
||||||
|
|
||||||
|
|
||||||
def test_no_label():
|
def test_no_label():
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
nlp.add_pipe("morphologizer")
|
nlp.add_pipe("morphologizer")
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
import pytest
|
import pytest
|
||||||
from numpy.testing import assert_equal, assert_array_almost_equal
|
from numpy.testing import assert_equal, assert_almost_equal
|
||||||
from spacy.attrs import TAG
|
from spacy.attrs import TAG
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
|
@ -68,7 +68,6 @@ PARTIAL_DATA = [
|
||||||
|
|
||||||
|
|
||||||
def test_label_smoothing():
|
def test_label_smoothing():
|
||||||
util.fix_random_seed()
|
|
||||||
nlp = Language()
|
nlp = Language()
|
||||||
tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing")
|
tagger_no_ls = nlp.add_pipe("tagger", "no_label_smoothing")
|
||||||
tagger_ls = nlp.add_pipe(
|
tagger_ls = nlp.add_pipe(
|
||||||
|
@ -88,7 +87,7 @@ def test_label_smoothing():
|
||||||
)
|
)
|
||||||
no_ls_grads = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
|
no_ls_grads = tagger_no_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
ls_grads = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
|
ls_grads = tagger_ls.get_loss(train_examples, tag_scores)[1][0]
|
||||||
assert_array_almost_equal((ls_grads - no_ls_grads)[0], [0.05, -0.025, -0.025])
|
assert_almost_equal(ls_grads / no_ls_grads, 0.925)
|
||||||
|
|
||||||
|
|
||||||
def test_no_label():
|
def test_no_label():
|
||||||
|
|
|
@ -43,11 +43,12 @@ architectures and their arguments and hyperparameters.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
| `model` | The model to use. Defaults to [Tagger](/api/architectures#Tagger). ~~Model[List[Doc], List[Floats2d]]~~ |
|
||||||
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
|
| `overwrite` <Tag variant="new">3.2</Tag> | Whether the values of existing features are overwritten. Defaults to `True`. ~~bool~~ |
|
||||||
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
| `extend` <Tag variant="new">3.2</Tag> | Whether existing feature types (whose values may or may not be overwritten depending on `overwrite`) are preserved. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
| `scorer` <Tag variant="new">3.2</Tag> | The scoring method. Defaults to [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"` and `"morph"` and [`Scorer.score_token_attr_per_feat`](/api/scorer#score_token_attr_per_feat) for the attribute `"morph"`. ~~Optional[Callable]~~ |
|
||||||
|
| `label_smoothing` <Tag variant="new">3.6</Tag> | Whether or not to use label smoothing. Defaults to `False`. ~~bool~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
|
%%GITHUB_SPACY/spacy/pipeline/morphologizer.pyx
|
||||||
|
|
Loading…
Reference in New Issue
Block a user