spaCy/spacy/scorer.py

# coding: utf8
from __future__ import division, print_function, unicode_literals

from .gold import tags_to_entities, GoldParse
from .errors import Errors


class PRFScore(object):
    """
    A precision / recall / F score
    """
    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.fn = 0

    def score_set(self, cand, gold):
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self):
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self):
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def fscore(self):
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))


class Scorer(object):
    def __init__(self, eval_punct=False):
        self.tokens = PRFScore()
        self.sbd = PRFScore()
        self.unlabelled = PRFScore()
        self.labelled = PRFScore()
        self.tags = PRFScore()
        self.ner = PRFScore()
        self.eval_punct = eval_punct

    @property
    def tags_acc(self):
        return self.tags.fscore * 100

    @property
    def token_acc(self):
        return self.tokens.precision * 100

    @property
    def uas(self):
        return self.unlabelled.fscore * 100

    @property
    def las(self):
        return self.labelled.fscore * 100

    @property
    def ents_p(self):
        return self.ner.precision * 100

    @property
    def ents_r(self):
        return self.ner.recall * 100

    @property
    def ents_f(self):
        return self.ner.fscore * 100

    @property
    def scores(self):
        return {
            'uas': self.uas,
            'las': self.las,
            'ents_p': self.ents_p,
            'ents_r': self.ents_r,
            'ents_f': self.ents_f,
            'tags_acc': self.tags_acc,
            'token_acc': self.token_acc
        }

    def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):
        if len(tokens) != len(gold):
            raise ValueError(Errors.E078.format(words_doc=len(tokens), words_gold=len(gold)))
        gold_deps = set()
        gold_tags = set()
        gold_ents = set(tags_to_entities([annot[-1]
                        for annot in gold.orig_annot]))
        for id_, word, tag, head, dep, ner in gold.orig_annot:
            gold_tags.add((id_, tag))
            if dep not in (None, "") and dep.lower() not in punct_labels:
                gold_deps.add((id_, head, dep.lower()))
        cand_deps = set()
        cand_tags = set()
        for token in tokens:
            if token.orth_.isspace():
                continue
            gold_i = gold.cand_to_gold[token.i]
            if gold_i is None:
                self.tokens.fp += 1
            else:
                self.tokens.tp += 1
                cand_tags.add((gold_i, token.tag_))
            if token.dep_.lower() not in punct_labels and token.orth_.strip():
                gold_head = gold.cand_to_gold[token.head.i]
                # None is indistinct, so we can't just add it to the set
                # Multiple (None, None) deps are possible
                if gold_i is None or gold_head is None:
                    self.unlabelled.fp += 1
                    self.labelled.fp += 1
                else:
                    cand_deps.add((gold_i, gold_head, token.dep_.lower()))
        if '-' not in [token[-1] for token in gold.orig_annot]:
            cand_ents = set()
            for ent in tokens.ents:
                first = gold.cand_to_gold[ent.start]
                last = gold.cand_to_gold[ent.end-1]
                if first is None or last is None:
                    self.ner.fp += 1
                else:
                    cand_ents.add((ent.label_, first, last))
            self.ner.score_set(cand_ents, gold_ents)
        self.tags.score_set(cand_tags, gold_tags)
        self.labelled.score_set(cand_deps, gold_deps)
        self.unlabelled.score_set(
            set(item[:2] for item in cand_deps),
            set(item[:2] for item in gold_deps),
        )
        if verbose:
            gold_words = [item[1] for item in gold.orig_annot]
            for w_id, h_id, dep in (cand_deps - gold_deps):
                print('F', gold_words[w_id], dep, gold_words[h_id])
            for w_id, h_id, dep in (gold_deps - cand_deps):
                print('M', gold_words[w_id], dep, gold_words[h_id])
Clean up imports, unused code, whitespace, docstrings 2017-04-15 13:05:47 +03:00			`# coding: utf8`
			`from __future__ import division, print_function, unicode_literals`
* Add scorer script 2015-03-11 04:07:03 +03:00
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 20:23:02 +03:00			`from .gold import tags_to_entities, GoldParse`
💫 New system for error messages and warnings (#2163) * Add spacy.errors module * Update deprecation and user warnings * Replace errors and asserts with new error message system * Remove redundant asserts * Fix whitespace * Add messages for print/util.prints statements * Fix typo * Fix typos * Move CLI messages to spacy.cli._messages * Add decorator to display error code with message An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc. * Remove unused link in spacy.about * Update errors for invalid pipeline components * Improve error for unknown factories * Add displaCy warnings * Update formatting consistency * Move error message to spacy.errors * Update errors and check if doc returned by component is None 2018-04-03 16:50:31 +03:00			`from .errors import Errors`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00
* Print parse if verbose in scorer 2015-04-05 23:29:30 +03:00
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`class PRFScore(object):`
Use consistent formatting for docstrings 2017-04-15 12:59:21 +03:00			`"""`
			`A precision / recall / F score`
			`"""`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`def __init__(self):`
			`self.tp = 0`
			`self.fp = 0`
			`self.fn = 0`

			`def score_set(self, cand, gold):`
			`self.tp += len(cand.intersection(gold))`
			`self.fp += len(cand - gold)`
			`self.fn += len(gold - cand)`

			`@property`
			`def precision(self):`
			`return self.tp / (self.tp + self.fp + 1e-100)`

			`@property`
			`def recall(self):`
			`return self.tp / (self.tp + self.fn + 1e-100)`

			`@property`
			`def fscore(self):`
			`p = self.precision`
			`r = self.recall`
			`return 2 * ((p * r) / (p + r + 1e-100))`


* Add scorer script 2015-03-11 04:07:03 +03:00			`class Scorer(object):`
			`def __init__(self, eval_punct=False):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`self.tokens = PRFScore()`
			`self.sbd = PRFScore()`
			`self.unlabelled = PRFScore()`
			`self.labelled = PRFScore()`
			`self.tags = PRFScore()`
			`self.ner = PRFScore()`
* Add scorer script 2015-03-11 04:07:03 +03:00			`self.eval_punct = eval_punct`

			`@property`
			`def tags_acc(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.tags.fscore * 100`
* Tmp commit. Working on whole document parsing 2015-05-24 03:49:56 +03:00
			`@property`
			`def token_acc(self):`
* Start scoring tokens 2015-06-28 07:21:38 +03:00			`return self.tokens.precision * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def uas(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.unlabelled.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def las(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.labelled.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def ents_p(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.precision * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def ents_r(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.recall * 100`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Add scorer script 2015-03-11 04:07:03 +03:00			`@property`
			`def ents_f(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
Refactor training, with new spacy.train module. Defaults still a little awkward. 2016-10-09 13:24:24 +03:00			`@property`
			`def scores(self):`
			`return {`
Tidy up language, lemmatizer and scorer 2017-10-27 15:40:14 +03:00			`'uas': self.uas,`
			`'las': self.las,`
			`'ents_p': self.ents_p,`
			`'ents_r': self.ents_r,`
			`'ents_f': self.ents_f,`
Refactor training, with new spacy.train module. Defaults still a little awkward. 2016-10-09 13:24:24 +03:00			`'tags_acc': self.tags_acc,`
			`'token_acc': self.token_acc`
			`}`

* Accept punct_labels as an argument to the scorer 2016-02-03 00:59:06 +03:00			`def score(self, tokens, gold, verbose=False, punct_labels=('p', 'punct')):`
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 20:23:02 +03:00			`if len(tokens) != len(gold):`
💫 New system for error messages and warnings (#2163) * Add spacy.errors module * Update deprecation and user warnings * Replace errors and asserts with new error message system * Remove redundant asserts * Fix whitespace * Add messages for print/util.prints statements * Fix typo * Fix typos * Move CLI messages to spacy.cli._messages * Add decorator to display error code with message An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc. * Remove unused link in spacy.about * Update errors for invalid pipeline components * Improve error for unknown factories * Add displaCy warnings * Update formatting consistency * Move error message to spacy.errors * Update errors and check if doc returned by component is None 2018-04-03 16:50:31 +03:00			`raise ValueError(Errors.E078.format(words_doc=len(tokens), words_gold=len(gold)))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`gold_deps = set()`
			`gold_tags = set()`
Tidy up language, lemmatizer and scorer 2017-10-27 15:40:14 +03:00			`gold_ents = set(tags_to_entities([annot[-1]`
			`for annot in gold.orig_annot]))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`for id_, word, tag, head, dep, ner in gold.orig_annot:`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`gold_tags.add((id_, tag))`
Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens 2017-03-16 17:38:28 +03:00			`if dep not in (None, "") and dep.lower() not in punct_labels:`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`gold_deps.add((id_, head, dep.lower()))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`cand_deps = set()`
			`cand_tags = set()`
			`for token in tokens:`
* Don't score whitespace tokens 2015-06-07 20:10:32 +03:00			`if token.orth_.isspace():`
			`continue`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`gold_i = gold.cand_to_gold[token.i]`
			`if gold_i is None:`
Revert "Merge branch 'develop' of https://github.com/explosion/spaCy into develop" This reverts commit c9ba3d3c2dc7067cf8bd55f878cec45a8c6d73d4, reversing changes made to 92c26a35d425d4e8ca1b805ea776ea10f5ded3df. 2018-03-27 20:23:02 +03:00			`self.tokens.fp += 1`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`else:`
* Start scoring tokens 2015-06-28 07:21:38 +03:00			`self.tokens.tp += 1`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`cand_tags.add((gold_i, token.tag_))`
* Accept punct_labels as an argument to the scorer 2016-02-03 00:59:06 +03:00			`if token.dep_.lower() not in punct_labels and token.orth_.strip():`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`gold_head = gold.cand_to_gold[token.head.i]`
			`# None is indistinct, so we can't just add it to the set`
			`# Multiple (None, None) deps are possible`
			`if gold_i is None or gold_head is None:`
			`self.unlabelled.fp += 1`
			`self.labelled.fp += 1`
			`else:`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`cand_deps.add((gold_i, gold_head, token.dep_.lower()))`
* Avoid NER scoring for sentences with some missing NER values. 2015-05-28 23:39:08 +03:00			`if '-' not in [token[-1] for token in gold.orig_annot]:`
			`cand_ents = set()`
			`for ent in tokens.ents:`
			`first = gold.cand_to_gold[ent.start]`
			`last = gold.cand_to_gold[ent.end-1]`
			`if first is None or last is None:`
			`self.ner.fp += 1`
			`else:`
			`cand_ents.add((ent.label_, first, last))`
			`self.ner.score_set(cand_ents, gold_ents)`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`self.tags.score_set(cand_tags, gold_tags)`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`self.labelled.score_set(cand_deps, gold_deps)`
			`self.unlabelled.score_set(`
			`set(item[:2] for item in cand_deps),`
			`set(item[:2] for item in gold_deps),`
			`)`
* Add verbose printing to scorer 2015-06-14 18:45:50 +03:00			`if verbose:`
			`gold_words = [item[1] for item in gold.orig_annot]`
			`for w_id, h_id, dep in (cand_deps - gold_deps):`
* Fix training under python3 2015-07-28 15:09:30 +03:00			`print('F', gold_words[w_id], dep, gold_words[h_id])`
* Add verbose printing to scorer 2015-06-14 18:45:50 +03:00			`for w_id, h_id, dep in (gold_deps - cand_deps):`
* Fix training under python3 2015-07-28 15:09:30 +03:00			`print('M', gold_words[w_id], dep, gold_words[h_id])`