mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-19 20:52:23 +03:00
add basic tests for debugging
This commit is contained in:
parent
391b512afd
commit
0f5c586e2f
|
@ -59,6 +59,7 @@ def tuplify(layer1: Model, layer2: Model, *layers) -> Model:
|
|||
)
|
||||
|
||||
|
||||
# TODO replace this with thinc version once PR is in
|
||||
def tuplify_forward(model, X, is_train):
|
||||
Ys = []
|
||||
backprops = []
|
||||
|
@ -77,16 +78,27 @@ def tuplify_forward(model, X, is_train):
|
|||
return tuple(Ys), backprop_tuplify
|
||||
|
||||
|
||||
# TODO make more robust, see chain
|
||||
# TODO replace this with thinc version once PR is in
|
||||
def tuplify_init(model, X, Y) -> Model:
|
||||
if X is None and Y is None:
|
||||
for layer in model.layers:
|
||||
layer.initialize()
|
||||
|
||||
if model.layers[0].has_dim("nI"):
|
||||
model.set_dim("nI", model.layers[0].get_dim("nI"))
|
||||
return model
|
||||
|
||||
for layer in model.layers:
|
||||
layer.initialize(X=X)
|
||||
# Try to set nO on each layer, where available.
|
||||
# All layers have the same input, and the output should map directly from the
|
||||
# given Y, if provided.
|
||||
for ii, layer in enumerate(model.layers):
|
||||
if Y is not None and layer.has_dim("nO") is None:
|
||||
layer.initialize(X=X, Y=Y[ii])
|
||||
else:
|
||||
layer.initialize(X=X)
|
||||
|
||||
if model.layers[0].has_dim("nI"):
|
||||
model.set_dim("nI", model.layers[0].get_dim("nI"))
|
||||
# this model can have an input dimension, but can't have an output dimension
|
||||
return model
|
||||
|
||||
|
||||
|
|
110
spacy/tests/pipeline/test_coref.py
Normal file
110
spacy/tests/pipeline/test_coref.py
Normal file
|
@ -0,0 +1,110 @@
|
|||
import pytest
|
||||
from spacy import util
|
||||
from spacy.training import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.tests.util import make_tempdir
|
||||
from spacy.pipeline.coref import DEFAULT_CLUSTERS_PREFIX
|
||||
|
||||
# fmt: off
|
||||
TRAIN_DATA = [
|
||||
(
|
||||
"Yes, I noticed that many friends around me received it. It seems that almost everyone received this SMS.",
|
||||
{
|
||||
"spans": {
|
||||
f"{DEFAULT_CLUSTERS_PREFIX}_1": [
|
||||
(5, 6, "MENTION"), # I
|
||||
(40, 42, "MENTION"), # me
|
||||
|
||||
],
|
||||
f"{DEFAULT_CLUSTERS_PREFIX}_2": [
|
||||
(52, 54, "MENTION"), # it
|
||||
(95, 103, "MENTION"), # this SMS
|
||||
]
|
||||
}
|
||||
},
|
||||
),
|
||||
]
|
||||
# fmt: on
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def nlp():
|
||||
return English()
|
||||
|
||||
|
||||
def test_add_pipe(nlp):
|
||||
nlp.add_pipe("coref")
|
||||
assert nlp.pipe_names == ["coref"]
|
||||
|
||||
|
||||
def test_not_initialized(nlp):
|
||||
nlp.add_pipe("coref")
|
||||
text = "She gave me her pen."
|
||||
with pytest.raises(ValueError):
|
||||
nlp(text)
|
||||
|
||||
|
||||
def test_initialized(nlp):
|
||||
nlp.add_pipe("coref")
|
||||
nlp.initialize()
|
||||
assert nlp.pipe_names == ["coref"]
|
||||
text = "She gave me her pen."
|
||||
doc = nlp(text)
|
||||
# The results of this are weird & non-deterministic
|
||||
print(doc.spans)
|
||||
|
||||
|
||||
def test_initialized_2(nlp):
|
||||
nlp.add_pipe("coref")
|
||||
nlp.initialize()
|
||||
assert nlp.pipe_names == ["coref"]
|
||||
text = "She gave me her pen."
|
||||
doc = nlp(text)
|
||||
# TODO: THIS CRASHES
|
||||
print(nlp(text).spans)
|
||||
|
||||
|
||||
def test_overfitting_IO(nlp):
|
||||
# Simple test to try and quickly overfit the senter - ensuring the ML models work correctly
|
||||
train_examples = []
|
||||
for text, annot in TRAIN_DATA:
|
||||
train_examples.append(Example.from_dict(nlp.make_doc(text), annot))
|
||||
|
||||
nlp.add_pipe("coref")
|
||||
optimizer = nlp.initialize()
|
||||
test_text = TRAIN_DATA[0][0]
|
||||
doc = nlp(test_text)
|
||||
print("BEFORE", doc.spans)
|
||||
|
||||
for i in range(5):
|
||||
losses = {}
|
||||
nlp.update(train_examples, sgd=optimizer, losses=losses)
|
||||
doc = nlp(test_text)
|
||||
print(i, doc.spans)
|
||||
print(losses["coref"]) # < 0.001
|
||||
|
||||
# test the trained model
|
||||
doc = nlp(test_text)
|
||||
print("AFTER", doc.spans)
|
||||
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
nlp2 = util.load_model_from_path(tmp_dir)
|
||||
doc2 = nlp2(test_text)
|
||||
print("doc2", doc2.spans)
|
||||
|
||||
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||
texts = [
|
||||
test_text,
|
||||
"I noticed many friends around me",
|
||||
"They received it. They received the SMS.",
|
||||
]
|
||||
batch_deps_1 = [doc.spans for doc in nlp.pipe(texts)]
|
||||
print(batch_deps_1)
|
||||
batch_deps_2 = [doc.spans for doc in nlp.pipe(texts)]
|
||||
print(batch_deps_2)
|
||||
no_batch_deps = [doc.spans for doc in [nlp(text) for text in texts]]
|
||||
print(no_batch_deps)
|
||||
# assert_equal(batch_deps_1, batch_deps_2)
|
||||
# assert_equal(batch_deps_1, no_batch_deps)
|
Loading…
Reference in New Issue
Block a user