mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Add AttributeRuler.score (#5963)
* Add AttributeRuler.score Add scoring for TAG / POS / MORPH / LEMMA if these are present in the assigned token attributes. Add default score weights (that don't really make a lot of sense) so that the scores are in the default config in some form. * Update docs
This commit is contained in:
parent
79d460e3a2
commit
90d88729e0
|
@ -4,9 +4,11 @@ from pathlib import Path
|
||||||
|
|
||||||
from .pipe import Pipe
|
from .pipe import Pipe
|
||||||
from ..errors import Errors
|
from ..errors import Errors
|
||||||
|
from ..gold import validate_examples
|
||||||
from ..language import Language
|
from ..language import Language
|
||||||
from ..matcher import Matcher
|
from ..matcher import Matcher
|
||||||
from ..symbols import IDS
|
from ..scorer import Scorer
|
||||||
|
from ..symbols import IDS, TAG, POS, MORPH, LEMMA
|
||||||
from ..tokens import Doc, Span
|
from ..tokens import Doc, Span
|
||||||
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
|
from ..tokens._retokenize import normalize_token_attrs, set_token_attrs
|
||||||
from ..vocab import Vocab
|
from ..vocab import Vocab
|
||||||
|
@ -192,6 +194,32 @@ class AttributeRuler(Pipe):
|
||||||
all_patterns.append(p)
|
all_patterns.append(p)
|
||||||
return all_patterns
|
return all_patterns
|
||||||
|
|
||||||
|
def score(self, examples, **kwargs):
|
||||||
|
"""Score a batch of examples.
|
||||||
|
|
||||||
|
examples (Iterable[Example]): The examples to score.
|
||||||
|
RETURNS (Dict[str, Any]): The scores, produced by
|
||||||
|
Scorer.score_token_attr for the attributes "tag", "pos", "morph"
|
||||||
|
and "lemma" for the target token attributes.
|
||||||
|
|
||||||
|
DOCS: https://spacy.io/api/tagger#score
|
||||||
|
"""
|
||||||
|
validate_examples(examples, "AttributeRuler.score")
|
||||||
|
results = {}
|
||||||
|
attrs = set()
|
||||||
|
for token_attrs in self.attrs:
|
||||||
|
attrs.update(token_attrs)
|
||||||
|
for attr in attrs:
|
||||||
|
if attr == TAG:
|
||||||
|
results.update(Scorer.score_token_attr(examples, "tag", **kwargs))
|
||||||
|
elif attr == POS:
|
||||||
|
results.update(Scorer.score_token_attr(examples, "pos", **kwargs))
|
||||||
|
elif attr == MORPH:
|
||||||
|
results.update(Scorer.score_token_attr(examples, "morph", **kwargs))
|
||||||
|
elif attr == LEMMA:
|
||||||
|
results.update(Scorer.score_token_attr(examples, "lemma", **kwargs))
|
||||||
|
return results
|
||||||
|
|
||||||
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
|
def to_bytes(self, exclude: Iterable[str] = tuple()) -> bytes:
|
||||||
"""Serialize the AttributeRuler to a bytestring.
|
"""Serialize the AttributeRuler to a bytestring.
|
||||||
|
|
||||||
|
|
|
@ -307,7 +307,7 @@ class Tagger(Pipe):
|
||||||
|
|
||||||
examples (Iterable[Example]): The examples to score.
|
examples (Iterable[Example]): The examples to score.
|
||||||
RETURNS (Dict[str, Any]): The scores, produced by
|
RETURNS (Dict[str, Any]): The scores, produced by
|
||||||
Scorer.score_token_attr for the attributes "tag", "pos" and "lemma".
|
Scorer.score_token_attr for the attributes "tag".
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/tagger#score
|
DOCS: https://spacy.io/api/tagger#score
|
||||||
"""
|
"""
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
import numpy
|
import numpy
|
||||||
|
from spacy.gold import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.pipeline import AttributeRuler
|
from spacy.pipeline import AttributeRuler
|
||||||
from spacy import util, registry
|
from spacy import util, registry
|
||||||
|
@ -94,6 +95,23 @@ def test_attributeruler_init_patterns(nlp, pattern_dicts):
|
||||||
assert doc[3].morph_ == "Case=Nom|Number=Sing"
|
assert doc[3].morph_ == "Case=Nom|Number=Sing"
|
||||||
|
|
||||||
|
|
||||||
|
def test_attributeruler_score(nlp, pattern_dicts):
|
||||||
|
# initialize with patterns
|
||||||
|
nlp.add_pipe("attribute_ruler", config={"pattern_dicts": pattern_dicts})
|
||||||
|
doc = nlp("This is a test.")
|
||||||
|
assert doc[2].lemma_ == "the"
|
||||||
|
assert doc[2].morph_ == "Case=Nom|Number=Plur"
|
||||||
|
assert doc[3].lemma_ == "cat"
|
||||||
|
assert doc[3].morph_ == "Case=Nom|Number=Sing"
|
||||||
|
|
||||||
|
dev_examples = [Example.from_dict(nlp.make_doc("This is a test."), {"lemmas": ["this", "is", "a", "cat", "."]})]
|
||||||
|
scores = nlp.evaluate(dev_examples)
|
||||||
|
# "cat" is the only correct lemma
|
||||||
|
assert scores["lemma_acc"] == pytest.approx(0.2)
|
||||||
|
# the empty morphs are correct
|
||||||
|
assert scores["morph_acc"] == pytest.approx(0.6)
|
||||||
|
|
||||||
|
|
||||||
def test_attributeruler_tag_map(nlp, tag_map):
|
def test_attributeruler_tag_map(nlp, tag_map):
|
||||||
a = AttributeRuler(nlp.vocab)
|
a = AttributeRuler(nlp.vocab)
|
||||||
a.load_from_tag_map(tag_map)
|
a.load_from_tag_map(tag_map)
|
||||||
|
|
|
@ -138,6 +138,21 @@ Get all patterns that have been added to the attribute ruler in the
|
||||||
| ----------- | -------------------------------------------------------------------------------------------- |
|
| ----------- | -------------------------------------------------------------------------------------------- |
|
||||||
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
|
| **RETURNS** | The patterns added to the attribute ruler. ~~List[Dict[str, Union[List[dict], dict, int]]]~~ |
|
||||||
|
|
||||||
|
## AttributeRuler.score {#score tag="method" new="3"}
|
||||||
|
|
||||||
|
Score a batch of examples.
|
||||||
|
|
||||||
|
> #### Example
|
||||||
|
>
|
||||||
|
> ```python
|
||||||
|
> scores = attribute_ruler.score(examples)
|
||||||
|
> ```
|
||||||
|
|
||||||
|
| Name | Description |
|
||||||
|
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
|
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||||
|
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"tag"`, `"pos"`, `"morph"` and `"lemma"` if present in any of the target token attributes. ~~Dict[str, float]~~ |
|
||||||
|
|
||||||
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
|
## AttributeRuler.load_from_tag_map {#load_from_tag_map tag="method"}
|
||||||
|
|
||||||
Load attribute ruler patterns from a tag map.
|
Load attribute ruler patterns from a tag map.
|
||||||
|
|
|
@ -252,7 +252,7 @@ Score a batch of examples.
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | --------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
| `examples` | The examples to score. ~~Iterable[Example]~~ |
|
||||||
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attributes `"pos"`, `"tag"` and `"lemma"`. ~~Dict[str, float]~~ |
|
| **RETURNS** | The scores, produced by [`Scorer.score_token_attr`](/api/scorer#score_token_attr) for the attribute `"tag"`. ~~Dict[str, float]~~ |
|
||||||
|
|
||||||
## Tagger.create_optimizer {#create_optimizer tag="method"}
|
## Tagger.create_optimizer {#create_optimizer tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user