mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Add micro PRF for morph scoring (#9546)
* Add micro PRF for morph scoring For pipelines where morph features are added by more than one component and a reference training corpus may not contain all features, a micro PRF score is more flexible than a simple accuracy score. An example is the reading and inflection features added by the Japanese tokenizer. * Use `morph_micro_f` as the default morph score for Japanese morphologizers. * Update docstring * Fix typo in docstring * Update Scorer API docs * Fix results type * Organize score list by attribute prefix
This commit is contained in:
parent
554fa414ec
commit
12974bf4d9
|
@ -203,7 +203,7 @@ class Japanese(Language):
|
||||||
"extend": True,
|
"extend": True,
|
||||||
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
|
"scorer": {"@scorers": "spacy.morphologizer_scorer.v1"},
|
||||||
},
|
},
|
||||||
default_score_weights={"pos_acc": 0.5, "morph_acc": 0.5, "morph_per_feat": None},
|
default_score_weights={"pos_acc": 0.5, "morph_micro_f": 0.5, "morph_per_feat": None},
|
||||||
)
|
)
|
||||||
def make_morphologizer(
|
def make_morphologizer(
|
||||||
nlp: Language,
|
nlp: Language,
|
||||||
|
|
|
@ -247,18 +247,21 @@ class Scorer:
|
||||||
missing_values: Set[Any] = MISSING_VALUES, # type: ignore[assignment]
|
missing_values: Set[Any] = MISSING_VALUES, # type: ignore[assignment]
|
||||||
**cfg,
|
**cfg,
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Return PRF scores per feat for a token attribute in UFEATS format.
|
"""Return micro PRF and PRF scores per feat for a token attribute in
|
||||||
|
UFEATS format.
|
||||||
|
|
||||||
examples (Iterable[Example]): Examples to score
|
examples (Iterable[Example]): Examples to score
|
||||||
attr (str): The attribute to score.
|
attr (str): The attribute to score.
|
||||||
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
getter (Callable[[Token, str], Any]): Defaults to getattr. If provided,
|
||||||
getter(token, attr) should return the value of the attribute for an
|
getter(token, attr) should return the value of the attribute for an
|
||||||
individual token.
|
individual token.
|
||||||
missing_values (Set[Any]): Attribute values to treat as missing annotation
|
missing_values (Set[Any]): Attribute values to treat as missing
|
||||||
in the reference annotation.
|
annotation in the reference annotation.
|
||||||
RETURNS (dict): A dictionary containing the per-feat PRF scores under
|
RETURNS (dict): A dictionary containing the micro PRF scores under the
|
||||||
the key attr_per_feat.
|
key attr_micro_p/r/f and the per-feat PRF scores under
|
||||||
|
attr_per_feat.
|
||||||
"""
|
"""
|
||||||
|
micro_score = PRFScore()
|
||||||
per_feat = {}
|
per_feat = {}
|
||||||
for example in examples:
|
for example in examples:
|
||||||
pred_doc = example.predicted
|
pred_doc = example.predicted
|
||||||
|
@ -300,15 +303,22 @@ class Scorer:
|
||||||
pred_per_feat[field] = set()
|
pred_per_feat[field] = set()
|
||||||
pred_per_feat[field].add((gold_i, feat))
|
pred_per_feat[field].add((gold_i, feat))
|
||||||
for field in per_feat:
|
for field in per_feat:
|
||||||
|
micro_score.score_set(pred_per_feat.get(field, set()), gold_per_feat.get(field, set()))
|
||||||
per_feat[field].score_set(
|
per_feat[field].score_set(
|
||||||
pred_per_feat.get(field, set()), gold_per_feat.get(field, set())
|
pred_per_feat.get(field, set()), gold_per_feat.get(field, set())
|
||||||
)
|
)
|
||||||
score_key = f"{attr}_per_feat"
|
result: Dict[str, Any] = {}
|
||||||
if any([len(v) for v in per_feat.values()]):
|
if len(micro_score) > 0:
|
||||||
result = {k: v.to_dict() for k, v in per_feat.items()}
|
result[f"{attr}_micro_p"] = micro_score.precision
|
||||||
return {score_key: result}
|
result[f"{attr}_micro_r"] = micro_score.recall
|
||||||
|
result[f"{attr}_micro_f"] = micro_score.fscore
|
||||||
|
result[f"{attr}_per_feat"] = {k: v.to_dict() for k, v in per_feat.items()}
|
||||||
else:
|
else:
|
||||||
return {score_key: None}
|
result[f"{attr}_micro_p"] = None
|
||||||
|
result[f"{attr}_micro_r"] = None
|
||||||
|
result[f"{attr}_micro_f"] = None
|
||||||
|
result[f"{attr}_per_feat"] = None
|
||||||
|
return result
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def score_spans(
|
def score_spans(
|
||||||
|
|
|
@ -249,6 +249,7 @@ def test_tag_score(tagged_doc):
|
||||||
assert results["tag_acc"] == 1.0
|
assert results["tag_acc"] == 1.0
|
||||||
assert results["pos_acc"] == 1.0
|
assert results["pos_acc"] == 1.0
|
||||||
assert results["morph_acc"] == 1.0
|
assert results["morph_acc"] == 1.0
|
||||||
|
assert results["morph_micro_f"] == 1.0
|
||||||
assert results["morph_per_feat"]["NounType"]["f"] == 1.0
|
assert results["morph_per_feat"]["NounType"]["f"] == 1.0
|
||||||
|
|
||||||
# Gold annotation is modified
|
# Gold annotation is modified
|
||||||
|
@ -272,6 +273,7 @@ def test_tag_score(tagged_doc):
|
||||||
assert results["tag_acc"] == 0.9
|
assert results["tag_acc"] == 0.9
|
||||||
assert results["pos_acc"] == 0.9
|
assert results["pos_acc"] == 0.9
|
||||||
assert results["morph_acc"] == approx(0.8)
|
assert results["morph_acc"] == approx(0.8)
|
||||||
|
assert results["morph_micro_f"] == approx(0.8461538)
|
||||||
assert results["morph_per_feat"]["NounType"]["f"] == 1.0
|
assert results["morph_per_feat"]["NounType"]["f"] == 1.0
|
||||||
assert results["morph_per_feat"]["Poss"]["f"] == 0.0
|
assert results["morph_per_feat"]["Poss"]["f"] == 0.0
|
||||||
assert results["morph_per_feat"]["Number"]["f"] == approx(0.72727272)
|
assert results["morph_per_feat"]["Number"]["f"] == approx(0.72727272)
|
||||||
|
|
|
@ -41,15 +41,20 @@ Calculate the scores for a list of [`Example`](/api/example) objects using the
|
||||||
scoring methods provided by the components in the pipeline.
|
scoring methods provided by the components in the pipeline.
|
||||||
|
|
||||||
The returned `Dict` contains the scores provided by the individual pipeline
|
The returned `Dict` contains the scores provided by the individual pipeline
|
||||||
components. For the scoring methods provided by the `Scorer` and use by the core
|
components. For the scoring methods provided by the `Scorer` and used by the
|
||||||
pipeline components, the individual score names start with the `Token` or `Doc`
|
core pipeline components, the individual score names start with the `Token` or
|
||||||
attribute being scored:
|
`Doc` attribute being scored:
|
||||||
|
|
||||||
- `token_acc`, `token_p`, `token_r`, `token_f`,
|
- `token_acc`, `token_p`, `token_r`, `token_f`
|
||||||
- `sents_p`, `sents_r`, `sents_f`
|
- `sents_p`, `sents_r`, `sents_f`
|
||||||
- `tag_acc`, `pos_acc`, `morph_acc`, `morph_per_feat`, `lemma_acc`
|
- `tag_acc`
|
||||||
|
- `pos_acc`
|
||||||
|
- `morph_acc`, `morph_micro_p`, `morph_micro_r`, `morph_micro_f`,
|
||||||
|
`morph_per_feat`
|
||||||
|
- `lemma_acc`
|
||||||
- `dep_uas`, `dep_las`, `dep_las_per_type`
|
- `dep_uas`, `dep_las`, `dep_las_per_type`
|
||||||
- `ents_p`, `ents_r` `ents_f`, `ents_per_type`
|
- `ents_p`, `ents_r` `ents_f`, `ents_per_type`
|
||||||
|
- `spans_sc_p`, `spans_sc_r`, `spans_sc_f`
|
||||||
- `cats_score` (depends on config, description provided in `cats_score_desc`),
|
- `cats_score` (depends on config, description provided in `cats_score_desc`),
|
||||||
`cats_micro_p`, `cats_micro_r`, `cats_micro_f`, `cats_macro_p`,
|
`cats_micro_p`, `cats_micro_r`, `cats_micro_f`, `cats_macro_p`,
|
||||||
`cats_macro_r`, `cats_macro_f`, `cats_macro_auc`, `cats_f_per_type`,
|
`cats_macro_r`, `cats_macro_f`, `cats_macro_auc`, `cats_f_per_type`,
|
||||||
|
@ -84,7 +89,7 @@ Docs with `has_unknown_spaces` are skipped during scoring.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------- | ------------------------------------------------------------------------------------------------------------------- |
|
| ----------- | ------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------ |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| **RETURNS** | `Dict` | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ |
|
| **RETURNS** | `Dict` | A dictionary containing the scores `token_acc`, `token_p`, `token_r`, `token_f`. ~~Dict[str, float]]~~ |
|
||||||
|
|
||||||
|
@ -125,13 +130,13 @@ scoring.
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
| `examples` | The `Example` objects holding both the predictions and the correct gold-standard annotations. ~~Iterable[Example]~~ |
|
||||||
| `attr` | The attribute to score. ~~str~~ |
|
| `attr` | The attribute to score. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
| `getter` | Defaults to `getattr`. If provided, `getter(token, attr)` should return the value of the attribute for an individual `Token`. ~~Callable[[Token, str], Any]~~ |
|
||||||
| `missing_values` | Attribute values to treat as missing annotation in the reference annotation. Defaults to `{0, None, ""}`. ~~Set[Any]~~ |
|
| `missing_values` | Attribute values to treat as missing annotation in the reference annotation. Defaults to `{0, None, ""}`. ~~Set[Any]~~ |
|
||||||
| **RETURNS** | A dictionary containing the per-feature PRF scores under the key `{attr}_per_feat`. ~~Dict[str, Dict[str, float]]~~ |
|
| **RETURNS** | A dictionary containing the micro PRF scores under the key `{attr}_micro_p/r/f` and the per-feature PRF scores under `{attr}_per_feat`. ~~Dict[str, Dict[str, float]]~~ |
|
||||||
|
|
||||||
## Scorer.score_spans {#score_spans tag="staticmethod" new="3"}
|
## Scorer.score_spans {#score_spans tag="staticmethod" new="3"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user