mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 12:18:04 +03:00
392c4880d9
* Switch to train_dataset() function in train CLI * Fixes for pipe() methods in pipeline components * Don't clobber `examples` variable with `as_example` in pipe() methods * Remove unnecessary traversals of `examples` * Update Parser.pipe() for Examples * Add `as_examples` kwarg to `pipe()` with implementation to return `Example`s * Accept `Doc` or `Example` in `pipe()` with `_get_doc()` (copied from `Pipe`) * Fixes to Example implementation in spacy.gold * Move `make_projective` from an attribute of Example to an argument of `Example.get_gold_parses()` * Head of 0 are not treated as unset * Unset heads are set to self rather than `None` (which causes problems while projectivizing) * Check for `Doc` (not just not `None`) when creating GoldParses for pre-merged example * Don't clobber `examples` variable in `iter_gold_docs()` * Add/modify gold tests for handling projectivity * In JSON roundtrip compare results from `dev_dataset` rather than `train_dataset` to avoid projectivization (and other potential modifications) * Add test for projective train vs. nonprojective dev versions of the same `Doc` * Handle ignore_misaligned as arg rather than attr Move `ignore_misaligned` from an attribute of `Example` to an argument to `Example.get_gold_parses()`, which makes it parallel to `make_projective`. Add test with old and new align that checks whether `ignore_misaligned` errors are raised as expected (only for new align). * Remove unused attrs from gold.pxd Remove `ignore_misaligned` and `make_projective` from `gold.pxd` * Restructure Example with merged sents as default An `Example` now includes a single `TokenAnnotation` that includes all the information from one `Doc` (=JSON `paragraph`). If required, the individual sentences can be returned as a list of examples with `Example.split_sents()` with no raw text available. * Input/output a single `Example.token_annotation` * Add `sent_starts` to `TokenAnnotation` to handle sentence boundaries * Replace `Example.merge_sents()` with `Example.split_sents()` * Modify components to use a single `Example.token_annotation` * Pipeline components * conllu2json converter * Rework/rename `add_token_annotation()` and `add_doc_annotation()` to `set_token_annotation()` and `set_doc_annotation()`, functions that set rather then appending/extending. * Rename `morphology` to `morphs` in `TokenAnnotation` and `GoldParse` * Add getters to `TokenAnnotation` to supply default values when a given attribute is not available * `Example.get_gold_parses()` in `spacy.gold._make_golds()` is only applied on single examples, so the `GoldParse` is returned saved in the provided `Example` rather than creating a new `Example` with no other internal annotation * Update tests for API changes and `merge_sents()` vs. `split_sents()` * Refer to Example.goldparse in iter_gold_docs() Use `Example.goldparse` in `iter_gold_docs()` instead of `Example.gold` because a `None` `GoldParse` is generated with ignore_misaligned and generating it on-the-fly can raise an unwanted AlignmentError * Fix make_orth_variants() Fix bug in make_orth_variants() related to conversion from multiple to one TokenAnnotation per Example. * Add basic test for make_orth_variants() * Replace try/except with conditionals * Replace default morph value with set
199 lines
6.6 KiB
Python
199 lines
6.6 KiB
Python
# coding: utf-8
|
|
from __future__ import unicode_literals
|
|
|
|
from numpy.testing import assert_almost_equal, assert_array_almost_equal
|
|
import pytest
|
|
from pytest import approx
|
|
from spacy.gold import Example, GoldParse
|
|
from spacy.scorer import Scorer, ROCAUCScore
|
|
from spacy.scorer import _roc_auc_score, _roc_curve
|
|
from .util import get_doc
|
|
|
|
test_las_apple = [
|
|
[
|
|
"Apple is looking at buying U.K. startup for $ 1 billion",
|
|
{"heads": [2, 2, 2, 2, 3, 6, 4, 4, 10, 10, 7],
|
|
"deps": ['nsubj', 'aux', 'ROOT', 'prep', 'pcomp', 'compound', 'dobj', 'prep', 'quantmod', 'compound', 'pobj']},
|
|
]
|
|
]
|
|
|
|
test_ner_cardinal = [
|
|
["100 - 200", {"entities": [[0, 3, "CARDINAL"], [6, 9, "CARDINAL"]]}]
|
|
]
|
|
|
|
test_ner_apple = [
|
|
[
|
|
"Apple is looking at buying U.K. startup for $1 billion",
|
|
{"entities": [(0, 5, "ORG"), (27, 31, "GPE"), (44, 54, "MONEY")]},
|
|
]
|
|
]
|
|
|
|
|
|
def test_las_per_type(en_vocab):
|
|
# Gold and Doc are identical
|
|
scorer = Scorer()
|
|
for input_, annot in test_las_apple:
|
|
doc = get_doc(
|
|
en_vocab,
|
|
words=input_.split(" "),
|
|
heads=([h - i for i, h in enumerate(annot["heads"])]),
|
|
deps=annot["deps"],
|
|
)
|
|
gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"])
|
|
scorer.score((doc, gold))
|
|
results = scorer.scores
|
|
|
|
assert results["uas"] == 100
|
|
assert results["las"] == 100
|
|
assert results["las_per_type"]["nsubj"]["p"] == 100
|
|
assert results["las_per_type"]["nsubj"]["r"] == 100
|
|
assert results["las_per_type"]["nsubj"]["f"] == 100
|
|
assert results["las_per_type"]["compound"]["p"] == 100
|
|
assert results["las_per_type"]["compound"]["r"] == 100
|
|
assert results["las_per_type"]["compound"]["f"] == 100
|
|
|
|
# One dep is incorrect in Doc
|
|
scorer = Scorer()
|
|
for input_, annot in test_las_apple:
|
|
doc = get_doc(
|
|
en_vocab,
|
|
words=input_.split(" "),
|
|
heads=([h - i for i, h in enumerate(annot["heads"])]),
|
|
deps=annot["deps"]
|
|
)
|
|
gold = GoldParse(doc, heads=annot["heads"], deps=annot["deps"])
|
|
doc[0].dep_ = "compound"
|
|
scorer.score((doc, gold))
|
|
results = scorer.scores
|
|
|
|
assert results["uas"] == 100
|
|
assert_almost_equal(results["las"], 90.9090909)
|
|
assert results["las_per_type"]["nsubj"]["p"] == 0
|
|
assert results["las_per_type"]["nsubj"]["r"] == 0
|
|
assert results["las_per_type"]["nsubj"]["f"] == 0
|
|
assert_almost_equal(results["las_per_type"]["compound"]["p"], 66.6666666)
|
|
assert results["las_per_type"]["compound"]["r"] == 100
|
|
assert results["las_per_type"]["compound"]["f"] == 80
|
|
|
|
|
|
def test_ner_per_type(en_vocab):
|
|
# Gold and Doc are identical
|
|
scorer = Scorer()
|
|
for input_, annot in test_ner_cardinal:
|
|
doc = get_doc(
|
|
en_vocab,
|
|
words=input_.split(" "),
|
|
ents=[[0, 1, "CARDINAL"], [2, 3, "CARDINAL"]],
|
|
)
|
|
ex = Example(doc=doc)
|
|
ex.set_token_annotation(entities=annot["entities"])
|
|
scorer.score(ex)
|
|
results = scorer.scores
|
|
|
|
assert results["ents_p"] == 100
|
|
assert results["ents_f"] == 100
|
|
assert results["ents_r"] == 100
|
|
assert results["ents_per_type"]["CARDINAL"]["p"] == 100
|
|
assert results["ents_per_type"]["CARDINAL"]["f"] == 100
|
|
assert results["ents_per_type"]["CARDINAL"]["r"] == 100
|
|
|
|
# Doc has one missing and one extra entity
|
|
# Entity type MONEY is not present in Doc
|
|
scorer = Scorer()
|
|
for input_, annot in test_ner_apple:
|
|
doc = get_doc(
|
|
en_vocab,
|
|
words=input_.split(" "),
|
|
ents=[[0, 1, "ORG"], [5, 6, "GPE"], [6, 7, "ORG"]],
|
|
)
|
|
ex = Example(doc=doc)
|
|
ex.set_token_annotation(entities=annot["entities"])
|
|
scorer.score(ex)
|
|
results = scorer.scores
|
|
|
|
assert results["ents_p"] == approx(66.66666)
|
|
assert results["ents_r"] == approx(66.66666)
|
|
assert results["ents_f"] == approx(66.66666)
|
|
assert "GPE" in results["ents_per_type"]
|
|
assert "MONEY" in results["ents_per_type"]
|
|
assert "ORG" in results["ents_per_type"]
|
|
assert results["ents_per_type"]["GPE"]["p"] == 100
|
|
assert results["ents_per_type"]["GPE"]["r"] == 100
|
|
assert results["ents_per_type"]["GPE"]["f"] == 100
|
|
assert results["ents_per_type"]["MONEY"]["p"] == 0
|
|
assert results["ents_per_type"]["MONEY"]["r"] == 0
|
|
assert results["ents_per_type"]["MONEY"]["f"] == 0
|
|
assert results["ents_per_type"]["ORG"]["p"] == 50
|
|
assert results["ents_per_type"]["ORG"]["r"] == 100
|
|
assert results["ents_per_type"]["ORG"]["f"] == approx(66.66666)
|
|
|
|
|
|
def test_roc_auc_score():
|
|
# Binary classification, toy tests from scikit-learn test suite
|
|
y_true = [0, 1]
|
|
y_score = [0, 1]
|
|
tpr, fpr, _ = _roc_curve(y_true, y_score)
|
|
roc_auc = _roc_auc_score(y_true, y_score)
|
|
assert_array_almost_equal(tpr, [0, 0, 1])
|
|
assert_array_almost_equal(fpr, [0, 1, 1])
|
|
assert_almost_equal(roc_auc, 1.0)
|
|
|
|
y_true = [0, 1]
|
|
y_score = [1, 0]
|
|
tpr, fpr, _ = _roc_curve(y_true, y_score)
|
|
roc_auc = _roc_auc_score(y_true, y_score)
|
|
assert_array_almost_equal(tpr, [0, 1, 1])
|
|
assert_array_almost_equal(fpr, [0, 0, 1])
|
|
assert_almost_equal(roc_auc, 0.0)
|
|
|
|
y_true = [1, 0]
|
|
y_score = [1, 1]
|
|
tpr, fpr, _ = _roc_curve(y_true, y_score)
|
|
roc_auc = _roc_auc_score(y_true, y_score)
|
|
assert_array_almost_equal(tpr, [0, 1])
|
|
assert_array_almost_equal(fpr, [0, 1])
|
|
assert_almost_equal(roc_auc, 0.5)
|
|
|
|
y_true = [1, 0]
|
|
y_score = [1, 0]
|
|
tpr, fpr, _ = _roc_curve(y_true, y_score)
|
|
roc_auc = _roc_auc_score(y_true, y_score)
|
|
assert_array_almost_equal(tpr, [0, 0, 1])
|
|
assert_array_almost_equal(fpr, [0, 1, 1])
|
|
assert_almost_equal(roc_auc, 1.0)
|
|
|
|
y_true = [1, 0]
|
|
y_score = [0.5, 0.5]
|
|
tpr, fpr, _ = _roc_curve(y_true, y_score)
|
|
roc_auc = _roc_auc_score(y_true, y_score)
|
|
assert_array_almost_equal(tpr, [0, 1])
|
|
assert_array_almost_equal(fpr, [0, 1])
|
|
assert_almost_equal(roc_auc, 0.5)
|
|
|
|
# same result as above with ROCAUCScore wrapper
|
|
score = ROCAUCScore()
|
|
score.score_set(0.5, 1)
|
|
score.score_set(0.5, 0)
|
|
assert_almost_equal(score.score, 0.5)
|
|
|
|
# check that errors are raised in undefined cases and score is -inf
|
|
y_true = [0, 0]
|
|
y_score = [0.25, 0.75]
|
|
with pytest.raises(ValueError):
|
|
_roc_auc_score(y_true, y_score)
|
|
|
|
score = ROCAUCScore()
|
|
score.score_set(0.25, 0)
|
|
score.score_set(0.75, 0)
|
|
assert score.score == -float("inf")
|
|
|
|
y_true = [1, 1]
|
|
y_score = [0.25, 0.75]
|
|
with pytest.raises(ValueError):
|
|
_roc_auc_score(y_true, y_score)
|
|
|
|
score = ROCAUCScore()
|
|
score.score_set(0.25, 1)
|
|
score.score_set(0.75, 1)
|
|
assert score.score == -float("inf")
|