spaCy/spacy/scorer.py

from __future__ import division

from .gold import tags_to_entities


class PRFScore(object):
    """A precision / recall / F score"""
    def __init__(self):
        self.tp = 0
        self.fp = 0
        self.fn = 0

    def score_set(self, cand, gold):
        self.tp += len(cand.intersection(gold))
        self.fp += len(cand - gold)
        self.fn += len(gold - cand)

    @property
    def precision(self):
        return self.tp / (self.tp + self.fp + 1e-100)

    @property
    def recall(self):
        return self.tp / (self.tp + self.fn + 1e-100)

    @property
    def fscore(self):
        p = self.precision
        r = self.recall
        return 2 * ((p * r) / (p + r + 1e-100))


class Scorer(object):
    def __init__(self, eval_punct=False):
        self.tokens = PRFScore()
        self.sbd = PRFScore()
        self.unlabelled = PRFScore()
        self.labelled = PRFScore()
        self.tags = PRFScore()
        self.ner = PRFScore()
        self.eval_punct = eval_punct

    @property
    def tags_acc(self):
        return self.tags.fscore * 100

    @property
    def token_acc(self):
        return self.tokens.fscore * 100

    @property
    def uas(self):
        return self.unlabelled.fscore * 100

    @property
    def las(self):
        return self.labelled.fscore * 100

    @property
    def ents_p(self):
        return self.ner.precision * 100

    @property
    def ents_r(self):
        return self.ner.recall * 100

    @property
    def ents_f(self):
        return self.ner.fscore * 100

    def score(self, tokens, gold, verbose=False):
        assert len(tokens) == len(gold)

        gold_deps = set()
        gold_tags = set()
        gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))
        for id_, word, tag, head, dep, ner in gold.orig_annot:
            gold_tags.add((id_, tag))
            if dep.lower() not in ('p', 'punct'):
                gold_deps.add((id_, head, dep.lower()))
        cand_deps = set()
        cand_tags = set()
        for token in tokens:
            if token.orth_.isspace():
                continue
            gold_i = gold.cand_to_gold[token.i]
            if gold_i is None:
                self.tags.fp += 1
            else:
                cand_tags.add((gold_i, token.tag_))
            if token.dep_.lower() not in ('p', 'punct') and token.orth_.strip():
                gold_head = gold.cand_to_gold[token.head.i]
                # None is indistinct, so we can't just add it to the set
                # Multiple (None, None) deps are possible
                if gold_i is None or gold_head is None:
                    self.unlabelled.fp += 1
                    self.labelled.fp += 1
                else:
                    cand_deps.add((gold_i, gold_head, token.dep_.lower()))
        if '-' not in [token[-1] for token in gold.orig_annot]:
            cand_ents = set()
            for ent in tokens.ents:
                first = gold.cand_to_gold[ent.start]
                last = gold.cand_to_gold[ent.end-1]
                if first is None or last is None:
                    self.ner.fp += 1
                else:
                    cand_ents.add((ent.label_, first, last))
            self.ner.score_set(cand_ents, gold_ents)
        self.tags.score_set(cand_tags, gold_tags)
        self.labelled.score_set(cand_deps, gold_deps)
        self.unlabelled.score_set(
            set(item[:2] for item in cand_deps),
            set(item[:2] for item in gold_deps),
        )
        if verbose:
            gold_words = [item[1] for item in gold.orig_annot]
            for w_id, h_id, dep in (cand_deps - gold_deps):
                print 'F', gold_words[w_id], dep, gold_words[h_id]
            for w_id, h_id, dep in (gold_deps - cand_deps):
                print 'M', gold_words[w_id], dep, gold_words[h_id]
* Add scorer script 2015-03-11 04:07:03 +03:00			`from __future__ import division`

* Avoid shipping the spacy.munge package 2015-06-08 01:54:13 +03:00			`from .gold import tags_to_entities`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00
* Print parse if verbose in scorer 2015-04-05 23:29:30 +03:00
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`class PRFScore(object):`
			`"""A precision / recall / F score"""`
			`def __init__(self):`
			`self.tp = 0`
			`self.fp = 0`
			`self.fn = 0`

			`def score_set(self, cand, gold):`
			`self.tp += len(cand.intersection(gold))`
			`self.fp += len(cand - gold)`
			`self.fn += len(gold - cand)`

			`@property`
			`def precision(self):`
			`return self.tp / (self.tp + self.fp + 1e-100)`

			`@property`
			`def recall(self):`
			`return self.tp / (self.tp + self.fn + 1e-100)`

			`@property`
			`def fscore(self):`
			`p = self.precision`
			`r = self.recall`
			`return 2 * ((p * r) / (p + r + 1e-100))`


* Add scorer script 2015-03-11 04:07:03 +03:00			`class Scorer(object):`
			`def __init__(self, eval_punct=False):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`self.tokens = PRFScore()`
			`self.sbd = PRFScore()`
			`self.unlabelled = PRFScore()`
			`self.labelled = PRFScore()`
			`self.tags = PRFScore()`
			`self.ner = PRFScore()`
* Add scorer script 2015-03-11 04:07:03 +03:00			`self.eval_punct = eval_punct`

			`@property`
			`def tags_acc(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.tags.fscore * 100`
* Tmp commit. Working on whole document parsing 2015-05-24 03:49:56 +03:00
			`@property`
			`def token_acc(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.tokens.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def uas(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.unlabelled.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def las(self):`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`return self.labelled.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def ents_p(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.precision * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`@property`
			`def ents_r(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.recall * 100`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Add scorer script 2015-03-11 04:07:03 +03:00			`@property`
			`def ents_f(self):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`return self.ner.fscore * 100`
* Add scorer script 2015-03-11 04:07:03 +03:00
			`def score(self, tokens, gold, verbose=False):`
			`assert len(tokens) == len(gold)`

* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`gold_deps = set()`
			`gold_tags = set()`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`gold_ents = set(tags_to_entities([annot[-1] for annot in gold.orig_annot]))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`for id_, word, tag, head, dep, ner in gold.orig_annot:`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`gold_tags.add((id_, tag))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`if dep.lower() not in ('p', 'punct'):`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`gold_deps.add((id_, head, dep.lower()))`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`cand_deps = set()`
			`cand_tags = set()`
			`for token in tokens:`
* Don't score whitespace tokens 2015-06-07 20:10:32 +03:00			`if token.orth_.isspace():`
			`continue`
* Fix POS tag evaluation in scorer.py: do evaluate punctuation tags 2015-05-30 19:24:32 +03:00			`gold_i = gold.cand_to_gold[token.i]`
			`if gold_i is None:`
			`self.tags.fp += 1`
			`else:`
			`cand_tags.add((gold_i, token.tag_))`
* Fix punctuation eval in scorer.py 2015-06-28 02:31:39 +03:00			`if token.dep_.lower() not in ('p', 'punct') and token.orth_.strip():`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`gold_head = gold.cand_to_gold[token.head.i]`
			`# None is indistinct, so we can't just add it to the set`
			`# Multiple (None, None) deps are possible`
			`if gold_i is None or gold_head is None:`
			`self.unlabelled.fp += 1`
			`self.labelled.fp += 1`
			`else:`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`cand_deps.add((gold_i, gold_head, token.dep_.lower()))`
* Avoid NER scoring for sentences with some missing NER values. 2015-05-28 23:39:08 +03:00			`if '-' not in [token[-1] for token in gold.orig_annot]:`
			`cand_ents = set()`
			`for ent in tokens.ents:`
			`first = gold.cand_to_gold[ent.start]`
			`last = gold.cand_to_gold[ent.end-1]`
			`if first is None or last is None:`
			`self.ner.fp += 1`
			`else:`
			`cand_ents.add((ent.label_, first, last))`
			`self.ner.score_set(cand_ents, gold_ents)`
* Fix evaluation of NER in scorer.py 2015-05-27 04:18:16 +03:00			`self.tags.score_set(cand_tags, gold_tags)`
* Update spacy.scorer, to use P/R/F to support tokenization errors 2015-05-24 21:07:18 +03:00			`self.labelled.score_set(cand_deps, gold_deps)`
			`self.unlabelled.score_set(`
			`set(item[:2] for item in cand_deps),`
			`set(item[:2] for item in gold_deps),`
			`)`
* Add verbose printing to scorer 2015-06-14 18:45:50 +03:00			`if verbose:`
			`gold_words = [item[1] for item in gold.orig_annot]`
			`for w_id, h_id, dep in (cand_deps - gold_deps):`
			`print 'F', gold_words[w_id], dep, gold_words[h_id]`
			`for w_id, h_id, dep in (gold_deps - cand_deps):`
			`print 'M', gold_words[w_id], dep, gold_words[h_id]`