mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Update Scorer and add API docs
This commit is contained in:
parent
4d550a3055
commit
b78a8dc1d2
|
@ -35,7 +35,17 @@ class PRFScore(object):
|
|||
|
||||
|
||||
class Scorer(object):
|
||||
"""Compute evaluation scores."""
|
||||
|
||||
def __init__(self, eval_punct=False):
|
||||
"""Initialize the Scorer.
|
||||
|
||||
eval_punct (bool): Evaluate the dependency attachments to and from
|
||||
punctuation.
|
||||
RETURNS (Scorer): The newly created object.
|
||||
|
||||
DOCS: https://spacy.io/api/scorer#init
|
||||
"""
|
||||
self.tokens = PRFScore()
|
||||
self.sbd = PRFScore()
|
||||
self.unlabelled = PRFScore()
|
||||
|
@ -46,34 +56,46 @@ class Scorer(object):
|
|||
|
||||
@property
|
||||
def tags_acc(self):
|
||||
"""RETURNS (float): Part-of-speech tag accuracy (fine grained tags,
|
||||
i.e. `Token.tag`).
|
||||
"""
|
||||
return self.tags.fscore * 100
|
||||
|
||||
@property
|
||||
def token_acc(self):
|
||||
"""RETURNS (float): Tokenization accuracy."""
|
||||
return self.tokens.precision * 100
|
||||
|
||||
@property
|
||||
def uas(self):
|
||||
"""RETURNS (float): Unlabelled dependency score."""
|
||||
return self.unlabelled.fscore * 100
|
||||
|
||||
@property
|
||||
def las(self):
|
||||
"""RETURNS (float): Labelled depdendency score."""
|
||||
return self.labelled.fscore * 100
|
||||
|
||||
@property
|
||||
def ents_p(self):
|
||||
"""RETURNS (float): Named entity accuracy (precision)."""
|
||||
return self.ner.precision * 100
|
||||
|
||||
@property
|
||||
def ents_r(self):
|
||||
"""RETURNS (float): Named entity accuracy (recall)."""
|
||||
return self.ner.recall * 100
|
||||
|
||||
@property
|
||||
def ents_f(self):
|
||||
"""RETURNS (float): Named entity accuracy (F-score)."""
|
||||
return self.ner.fscore * 100
|
||||
|
||||
@property
|
||||
def scores(self):
|
||||
"""RETURNS (dict): All scores with keys `uas`, `las`, `ents_p`,
|
||||
`ents_r`, `ents_f`, `tags_acc` and `token_acc`.
|
||||
"""
|
||||
return {
|
||||
"uas": self.uas,
|
||||
"las": self.las,
|
||||
|
@ -84,9 +106,20 @@ class Scorer(object):
|
|||
"token_acc": self.token_acc,
|
||||
}
|
||||
|
||||
def score(self, tokens, gold, verbose=False, punct_labels=("p", "punct")):
|
||||
if len(tokens) != len(gold):
|
||||
gold = GoldParse.from_annot_tuples(tokens, zip(*gold.orig_annot))
|
||||
def score(self, doc, gold, verbose=False, punct_labels=("p", "punct")):
|
||||
"""Update the evaluation scores from a single Doc / GoldParse pair.
|
||||
|
||||
doc (Doc): The predicted annotations.
|
||||
gold (GoldParse): The correct annotations.
|
||||
verbose (bool): Print debugging information.
|
||||
punct_labels (tuple): Dependency labels for punctuation. Used to
|
||||
evaluate dependency attachments to punctuation if `eval_punct` is
|
||||
`True`.
|
||||
|
||||
DOCS: https://spacy.io/api/scorer#score
|
||||
"""
|
||||
if len(doc) != len(gold):
|
||||
gold = GoldParse.from_annot_tuples(doc, zip(*gold.orig_annot))
|
||||
gold_deps = set()
|
||||
gold_tags = set()
|
||||
gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
|
||||
|
@ -96,7 +129,7 @@ class Scorer(object):
|
|||
gold_deps.add((id_, head, dep.lower()))
|
||||
cand_deps = set()
|
||||
cand_tags = set()
|
||||
for token in tokens:
|
||||
for token in doc:
|
||||
if token.orth_.isspace():
|
||||
continue
|
||||
gold_i = gold.cand_to_gold[token.i]
|
||||
|
@ -116,7 +149,7 @@ class Scorer(object):
|
|||
cand_deps.add((gold_i, gold_head, token.dep_.lower()))
|
||||
if "-" not in [token[-1] for token in gold.orig_annot]:
|
||||
cand_ents = set()
|
||||
for ent in tokens.ents:
|
||||
for ent in doc.ents:
|
||||
first = gold.cand_to_gold[ent.start]
|
||||
last = gold.cand_to_gold[ent.end - 1]
|
||||
if first is None or last is None:
|
||||
|
|
58
website/docs/api/scorer.md
Normal file
58
website/docs/api/scorer.md
Normal file
|
@ -0,0 +1,58 @@
|
|||
---
|
||||
title: Scorer
|
||||
teaser: Compute evaluation scores
|
||||
tag: class
|
||||
source: spacy/scorer.py
|
||||
---
|
||||
|
||||
The `Scorer` computes and stores evaluation scores. It's typically created by
|
||||
[`Language.evaluate`](/api/language#evaluate).
|
||||
|
||||
## Scorer.\_\_init\_\_ {#init tag="method"}
|
||||
|
||||
Create a new `Scorer`.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> from spacy.scorer import Scorer
|
||||
>
|
||||
> scorer = Scorer()
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| ------------ | -------- | ------------------------------------------------------------ |
|
||||
| `eval_punct` | bool | Evaluate the dependency attachments to and from punctuation. |
|
||||
| **RETURNS** | `Scorer` | The newly created object. |
|
||||
|
||||
## Scorer.score {#score tag="method"}
|
||||
|
||||
Update the evaluation scores from a single [`Doc`](/api/doc) /
|
||||
[`GoldParse`](/api/goldparse) pair.
|
||||
|
||||
> #### Example
|
||||
>
|
||||
> ```python
|
||||
> scorer = Scorer()
|
||||
> scorer.score(doc, gold)
|
||||
> ```
|
||||
|
||||
| Name | Type | Description |
|
||||
| -------------- | ----------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| `doc` | `Doc` | The predicted annotations. |
|
||||
| `gold` | `GoldParse` | The correct annotations. |
|
||||
| `verbose` | bool | Print debugging information. |
|
||||
| `punct_labels` | tuple | Dependency labels for punctuation. Used to evaluate dependency attachments to punctuation if `eval_punct` is `True`. |
|
||||
|
||||
## Properties
|
||||
|
||||
| Name | Type | Description |
|
||||
| ----------- | ----- | -------------------------------------------------------------------------------------------- |
|
||||
| `token_acc` | float | Tokenization accuracy. |
|
||||
| `tags_acc` | float | Part-of-speech tag accuracy (fine grained tags, i.e. `Token.tag`). |
|
||||
| `uas` | float | Unlabelled dependency score. |
|
||||
| `las` | float | Labelled dependency score. |
|
||||
| `ents_p` | float | Named entity accuracy (precision). |
|
||||
| `ents_r` | float | Named entity accuracy (recall). |
|
||||
| `ents_f` | float | Named entity accuracy (F-score). |
|
||||
| `scores` | dict | All scores with keys `uas`, `las`, `ents_p`, `ents_r`, `ents_f`, `tags_acc` and `token_acc`. |
|
|
@ -90,7 +90,8 @@
|
|||
{ "text": "StringStore", "url": "/api/stringstore" },
|
||||
{ "text": "Vectors", "url": "/api/vectors" },
|
||||
{ "text": "GoldParse", "url": "/api/goldparse" },
|
||||
{ "text": "GoldCorpus", "url": "/api/goldcorpus" }
|
||||
{ "text": "GoldCorpus", "url": "/api/goldcorpus" },
|
||||
{ "text": "Scorer", "url": "/api/scorer" }
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue
Block a user