mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
74 lines
2.4 KiB
Python
74 lines
2.4 KiB
Python
|
# coding: utf-8
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
from pytest import approx
|
||
|
from spacy.gold import GoldParse
|
||
|
from spacy.scorer import Scorer
|
||
|
from .util import get_doc
|
||
|
|
||
|
test_ner_cardinal = [
|
||
|
[
|
||
|
"100 - 200",
|
||
|
{
|
||
|
"entities": [
|
||
|
[0, 3, "CARDINAL"],
|
||
|
[6, 9, "CARDINAL"]
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
]
|
||
|
|
||
|
test_ner_apple = [
|
||
|
[
|
||
|
"Apple is looking at buying U.K. startup for $1 billion",
|
||
|
{
|
||
|
"entities": [
|
||
|
(0, 5, "ORG"),
|
||
|
(27, 31, "GPE"),
|
||
|
(44, 54, "MONEY"),
|
||
|
]
|
||
|
}
|
||
|
]
|
||
|
]
|
||
|
|
||
|
def test_ner_per_type(en_vocab):
|
||
|
# Gold and Doc are identical
|
||
|
scorer = Scorer()
|
||
|
for input_, annot in test_ner_cardinal:
|
||
|
doc = get_doc(en_vocab, words = input_.split(' '), ents = [[0, 1, 'CARDINAL'], [2, 3, 'CARDINAL']])
|
||
|
gold = GoldParse(doc, entities = annot['entities'])
|
||
|
scorer.score(doc, gold)
|
||
|
results = scorer.scores
|
||
|
|
||
|
assert results['ents_p'] == 100
|
||
|
assert results['ents_f'] == 100
|
||
|
assert results['ents_r'] == 100
|
||
|
assert results['ents_per_type']['CARDINAL']['p'] == 100
|
||
|
assert results['ents_per_type']['CARDINAL']['f'] == 100
|
||
|
assert results['ents_per_type']['CARDINAL']['r'] == 100
|
||
|
|
||
|
# Doc has one missing and one extra entity
|
||
|
# Entity type MONEY is not present in Doc
|
||
|
scorer = Scorer()
|
||
|
for input_, annot in test_ner_apple:
|
||
|
doc = get_doc(en_vocab, words = input_.split(' '), ents = [[0, 1, 'ORG'], [5, 6, 'GPE'], [6, 7, 'ORG']])
|
||
|
gold = GoldParse(doc, entities = annot['entities'])
|
||
|
scorer.score(doc, gold)
|
||
|
results = scorer.scores
|
||
|
|
||
|
assert results['ents_p'] == approx(66.66666)
|
||
|
assert results['ents_r'] == approx(66.66666)
|
||
|
assert results['ents_f'] == approx(66.66666)
|
||
|
assert 'GPE' in results['ents_per_type']
|
||
|
assert 'MONEY' in results['ents_per_type']
|
||
|
assert 'ORG' in results['ents_per_type']
|
||
|
assert results['ents_per_type']['GPE']['p'] == 100
|
||
|
assert results['ents_per_type']['GPE']['r'] == 100
|
||
|
assert results['ents_per_type']['GPE']['f'] == 100
|
||
|
assert results['ents_per_type']['MONEY']['p'] == 0
|
||
|
assert results['ents_per_type']['MONEY']['r'] == 0
|
||
|
assert results['ents_per_type']['MONEY']['f'] == 0
|
||
|
assert results['ents_per_type']['ORG']['p'] == 50
|
||
|
assert results['ents_per_type']['ORG']['r'] == 100
|
||
|
assert results['ents_per_type']['ORG']['f'] == approx(66.66666)
|