spaCy/spacy/tests/test_scorer.py

# coding: utf-8
from __future__ import unicode_literals

from pytest import approx
from spacy.gold import GoldParse
from spacy.scorer import Scorer
from .util import get_doc

test_ner_cardinal = [
    [
        "100 - 200",
        {
            "entities": [
                [0, 3, "CARDINAL"],
                [6, 9, "CARDINAL"]
            ]
        }
    ]
]

test_ner_apple = [
    [
        "Apple is looking at buying U.K. startup for $1 billion",
        {
            "entities": [
                (0, 5, "ORG"),
                (27, 31, "GPE"),
                (44, 54, "MONEY"),
            ]
        }
    ]
]

def test_ner_per_type(en_vocab):
    # Gold and Doc are identical
    scorer = Scorer()
    for input_, annot in test_ner_cardinal:
        doc = get_doc(en_vocab, words = input_.split(' '), ents = [[0, 1, 'CARDINAL'], [2, 3, 'CARDINAL']])
        gold = GoldParse(doc, entities = annot['entities'])
        scorer.score(doc, gold)
    results = scorer.scores

    assert results['ents_p'] == 100
    assert results['ents_f'] == 100
    assert results['ents_r'] == 100
    assert results['ents_per_type']['CARDINAL']['p'] == 100
    assert results['ents_per_type']['CARDINAL']['f'] == 100
    assert results['ents_per_type']['CARDINAL']['r'] == 100

    # Doc has one missing and one extra entity
    # Entity type MONEY is not present in Doc
    scorer = Scorer()
    for input_, annot in test_ner_apple:
        doc = get_doc(en_vocab, words = input_.split(' '), ents = [[0, 1, 'ORG'], [5, 6, 'GPE'], [6, 7, 'ORG']])
        gold = GoldParse(doc, entities = annot['entities'])
        scorer.score(doc, gold)
    results = scorer.scores

    assert results['ents_p'] == approx(66.66666)
    assert results['ents_r'] == approx(66.66666)
    assert results['ents_f'] == approx(66.66666)
    assert 'GPE' in results['ents_per_type']
    assert 'MONEY' in results['ents_per_type']
    assert 'ORG' in results['ents_per_type']
    assert results['ents_per_type']['GPE']['p'] == 100
    assert results['ents_per_type']['GPE']['r'] == 100
    assert results['ents_per_type']['GPE']['f'] == 100
    assert results['ents_per_type']['MONEY']['p'] == 0
    assert results['ents_per_type']['MONEY']['r'] == 0
    assert results['ents_per_type']['MONEY']['f'] == 0
    assert results['ents_per_type']['ORG']['p'] == 50
    assert results['ents_per_type']['ORG']['r'] == 100
    assert results['ents_per_type']['ORG']['f'] == approx(66.66666)