mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-29 09:23:12 +03:00
adding tests for trained models to ensure predict reproducibility
This commit is contained in:
parent
ede979d42f
commit
e94a21638e
|
@ -1,4 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
from spacy.attrs import ENT_IOB
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
|
@ -332,6 +335,19 @@ def test_overfitting_IO():
|
||||||
assert ents2[0].text == "London"
|
assert ents2[0].text == "London"
|
||||||
assert ents2[0].label_ == "LOC"
|
assert ents2[0].label_ == "LOC"
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Just a sentence.",
|
||||||
|
"Then one more sentence about London.",
|
||||||
|
"Here is another one.",
|
||||||
|
"I like London.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([ENT_IOB]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([ENT_IOB]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.to_array([ENT_IOB]) for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
def test_ner_warns_no_lookups(caplog):
|
def test_ner_warns_no_lookups(caplog):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
from spacy.attrs import DEP
|
||||||
|
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
@ -210,3 +213,16 @@ def test_overfitting_IO():
|
||||||
assert doc2[0].dep_ == "nsubj"
|
assert doc2[0].dep_ == "nsubj"
|
||||||
assert doc2[2].dep_ == "dobj"
|
assert doc2[2].dep_ == "dobj"
|
||||||
assert doc2[3].dep_ == "punct"
|
assert doc2[3].dep_ == "punct"
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Just a sentence.",
|
||||||
|
"Then one more sentence about London.",
|
||||||
|
"Here is another one.",
|
||||||
|
"I like London.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([DEP]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([DEP]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.to_array([DEP]) for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
from typing import Callable, Iterable
|
from typing import Callable, Iterable
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
from spacy.attrs import ENT_KB_ID
|
||||||
|
|
||||||
from spacy.kb import KnowledgeBase, get_candidates, Candidate
|
from spacy.kb import KnowledgeBase, get_candidates, Candidate
|
||||||
from spacy.vocab import Vocab
|
from spacy.vocab import Vocab
|
||||||
|
@ -496,6 +498,19 @@ def test_overfitting_IO():
|
||||||
predictions.append(ent.kb_id_)
|
predictions.append(ent.kb_id_)
|
||||||
assert predictions == GOLD_entities
|
assert predictions == GOLD_entities
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Russ Cochran captured his first major title with his son as caddie.",
|
||||||
|
"Russ Cochran his reprints include EC Comics.",
|
||||||
|
"Russ Cochran has been publishing comic art.",
|
||||||
|
"Russ Cochran was a member of University of Kentucky's golf team.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([ENT_KB_ID]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([ENT_KB_ID]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.to_array([ENT_KB_ID]) for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
def test_kb_serialization():
|
def test_kb_serialization():
|
||||||
# Test that the KB can be used in a pipeline with a different vocab
|
# Test that the KB can be used in a pipeline with a different vocab
|
||||||
|
|
|
@ -46,7 +46,6 @@ def test_components_batching_list(name):
|
||||||
@pytest.mark.parametrize("name", ["textcat"])
|
@pytest.mark.parametrize("name", ["textcat"])
|
||||||
def test_components_batching_array(name):
|
def test_components_batching_array(name):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
in_data = [nlp(text) for text in texts]
|
|
||||||
proc = nlp.create_pipe(name)
|
proc = nlp.create_pipe(name)
|
||||||
util_batch_unbatch_docs_array(proc.model, get_docs(), array)
|
util_batch_unbatch_docs_array(proc.model, get_docs(), array)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
|
@ -6,6 +7,7 @@ from spacy.lang.en import English
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
from spacy.tests.util import make_tempdir
|
from spacy.tests.util import make_tempdir
|
||||||
from spacy.morphology import Morphology
|
from spacy.morphology import Morphology
|
||||||
|
from spacy.attrs import MORPH
|
||||||
|
|
||||||
|
|
||||||
def test_label_types():
|
def test_label_types():
|
||||||
|
@ -101,3 +103,16 @@ def test_overfitting_IO():
|
||||||
doc2 = nlp2(test_text)
|
doc2 = nlp2(test_text)
|
||||||
assert [str(t.morph) for t in doc2] == gold_morphs
|
assert [str(t.morph) for t in doc2] == gold_morphs
|
||||||
assert [t.pos_ for t in doc2] == gold_pos_tags
|
assert [t.pos_ for t in doc2] == gold_pos_tags
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Just a sentence.",
|
||||||
|
"Then one more sentence about London.",
|
||||||
|
"Here is another one.",
|
||||||
|
"I like London.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([MORPH]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([MORPH]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.to_array([MORPH]) for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
|
@ -1,4 +1,6 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
from spacy.attrs import SENT_START
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
|
@ -80,3 +82,18 @@ def test_overfitting_IO():
|
||||||
nlp2 = util.load_model_from_path(tmp_dir)
|
nlp2 = util.load_model_from_path(tmp_dir)
|
||||||
doc2 = nlp2(test_text)
|
doc2 = nlp2(test_text)
|
||||||
assert [int(t.is_sent_start) for t in doc2] == gold_sent_starts
|
assert [int(t.is_sent_start) for t in doc2] == gold_sent_starts
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Just a sentence.",
|
||||||
|
"Then one more sentence about London.",
|
||||||
|
"Here is another one.",
|
||||||
|
"I like London.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([SENT_START]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([SENT_START]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [
|
||||||
|
doc.to_array([SENT_START]) for doc in [nlp(text) for text in texts]
|
||||||
|
]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
|
@ -1,4 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
|
from numpy.testing import assert_equal
|
||||||
|
from spacy.attrs import TAG
|
||||||
|
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
@ -117,6 +120,19 @@ def test_overfitting_IO():
|
||||||
assert doc2[2].tag_ is "J"
|
assert doc2[2].tag_ is "J"
|
||||||
assert doc2[3].tag_ is "N"
|
assert doc2[3].tag_ is "N"
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = [
|
||||||
|
"Just a sentence.",
|
||||||
|
"I like green eggs.",
|
||||||
|
"Here is another one.",
|
||||||
|
"I eat ham.",
|
||||||
|
]
|
||||||
|
batch_deps_1 = [doc.to_array([TAG]) for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.to_array([TAG]) for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.to_array([TAG]) for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
def test_tagger_requires_labels():
|
def test_tagger_requires_labels():
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
import pytest
|
import pytest
|
||||||
import random
|
import random
|
||||||
import numpy.random
|
import numpy.random
|
||||||
|
from numpy.testing import assert_equal
|
||||||
from thinc.api import fix_random_seed
|
from thinc.api import fix_random_seed
|
||||||
from spacy import util
|
from spacy import util
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
@ -174,6 +175,14 @@ def test_overfitting_IO():
|
||||||
assert scores["cats_score"] == 1.0
|
assert scores["cats_score"] == 1.0
|
||||||
assert "cats_score_desc" in scores
|
assert "cats_score_desc" in scores
|
||||||
|
|
||||||
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
|
texts = ["Just a sentence.", "I like green eggs.", "I am happy.", "I eat ham."]
|
||||||
|
batch_deps_1 = [doc.cats for doc in nlp.pipe(texts)]
|
||||||
|
batch_deps_2 = [doc.cats for doc in nlp.pipe(texts)]
|
||||||
|
no_batch_deps = [doc.cats for doc in [nlp(text) for text in texts]]
|
||||||
|
assert_equal(batch_deps_1, batch_deps_2)
|
||||||
|
assert_equal(batch_deps_1, no_batch_deps)
|
||||||
|
|
||||||
|
|
||||||
# fmt: off
|
# fmt: off
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
Loading…
Reference in New Issue
Block a user