Update test

This commit is contained in:
Matthew Honnibal 2020-06-13 23:05:16 +02:00
parent 5564314d32
commit 3eb8f3867e

View File

@ -6,6 +6,7 @@ from spacy.gold.new_example import NewExample as Example
from spacy.lang.en import English
from spacy.syntax.nonproj import is_nonproj_tree
from spacy.syntax.gold_parse import GoldParse, get_parses_from_example
from spacy.syntax.gold_parse import get_parses_from_example
from spacy.tokens import Doc
from spacy.util import get_words_and_spaces, compounding, minibatch
import pytest
@ -279,22 +280,21 @@ def test_roundtrip_docs_to_json(doc):
goldcorpus = GoldCorpus(train=str(json_file), dev=str(json_file))
reloaded_example = next(goldcorpus.dev_dataset(nlp=nlp))
goldparse = reloaded_example._deprecated_get_gold()
assert len(doc) == goldcorpus.count_train()
assert text == reloaded_example.text
assert tags == goldparse.tags
assert pos == goldparse.pos
assert morphs == goldparse.morphs
assert lemmas == goldparse.lemmas
assert deps == goldparse.labels
assert heads == goldparse.heads
assert biluo_tags == goldparse.ner
assert "TRAVEL" in goldparse.cats
assert "BAKING" in goldparse.cats
assert cats["TRAVEL"] == goldparse.cats["TRAVEL"]
assert cats["BAKING"] == goldparse.cats["BAKING"]
assert text == reloaded_example.predicted.text
assert tags == [t.tag_ for t in reloaded_example.reference]
assert pos == [t.pos_ for t in reloaded_example.reference]
assert morphs == [t.morph_ for t in reloaded_example.reference]
assert lemmas == [t.lemma_ for t in reloaded_example.reference]
assert deps == [t.dep_ for t in reloaded_example.reference]
assert heads == [t.head.i for t in reloaded_example.reference]
assert "TRAVEL" in reloaded_example.reference.cats
assert "BAKING" in reloaded_example.reference.cats
assert cats["TRAVEL"] == reloaded_example.reference.cats["TRAVEL"]
assert cats["BAKING"] == reloaded_example.reference.cats["BAKING"]
@pytest.mark.xfail # TODO do we need to do the projectivity differently?
def test_projective_train_vs_nonprojective_dev(doc):
nlp = English()
deps = [t.dep_ for t in doc]
@ -310,7 +310,7 @@ def test_projective_train_vs_nonprojective_dev(doc):
train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
dev_reloaded_example = next(goldcorpus.dev_dataset(nlp))
dev_goldparse = dev_reloaded_example._deprecated_get_gold()
dev_goldparse = get_parses_from_example(dev_reloaded_example)[0][1]
assert is_nonproj_tree([t.head.i for t in doc]) is True
assert is_nonproj_tree(train_goldparse.heads) is False
@ -365,7 +365,7 @@ def test_make_orth_variants(doc):
# due to randomness, test only that this runs with no errors for now
train_reloaded_example = next(goldcorpus.train_dataset(nlp, orth_variant_level=0.2))
train_goldparse = train_reloaded_example._deprecated_get_gold()
train_goldparse = get_parses_from_example(train_reloaded_example)[0][1]
@pytest.mark.parametrize(
@ -419,20 +419,6 @@ def test_gold_constructor():
assert gold.words == ["This", "is", "a", "sentence"]
def test_gold_orig_annot():
nlp = English()
doc = nlp("This is a sentence")
gold = GoldParse(doc, cats={"cat1": 1.0, "cat2": 0.0})
assert gold.orig.words == ["This", "is", "a", "sentence"]
assert gold.cats["cat1"]
doc_annotation = DocAnnotation(cats={"cat1": 0.0, "cat2": 1.0})
gold2 = GoldParse.from_annotation(doc, doc_annotation, gold.orig)
assert gold2.orig.words == ["This", "is", "a", "sentence"]
assert not gold2.cats["cat1"]
def test_tuple_format_implicit():
"""Test tuple format with implicit GoldParse creation"""