diff --git a/spacy/ml/models/coref_util.py b/spacy/ml/models/coref_util.py index 9281ad0c7..00d501f80 100644 --- a/spacy/ml/models/coref_util.py +++ b/spacy/ml/models/coref_util.py @@ -207,11 +207,13 @@ def create_gold_scores( def spans2ints(doc): """Convert doc.spans to nested list of ints for comparison. - The ints are token indices. + The ints are character indices, and the spans groups are sorted by key first. This is useful for checking consistency of predictions. """ out = [] - for key, cluster in doc.spans.items(): - out.append([(ss.start, ss.end) for ss in cluster]) + keys = sorted([key for key in doc.spans]) + for key in keys: + cluster = doc.spans[key] + out.append([(ss.start_char, ss.end_char) for ss in cluster]) return out diff --git a/spacy/tests/pipeline/test_span_predictor.py b/spacy/tests/pipeline/test_span_predictor.py index 4434b6651..3d88b9548 100644 --- a/spacy/tests/pipeline/test_span_predictor.py +++ b/spacy/tests/pipeline/test_span_predictor.py @@ -114,13 +114,15 @@ def test_overfitting_IO(nlp): test_text = TRAIN_DATA[0][0] doc = nlp(test_text) - for i in range(1500): + for i in range(15): losses = {} nlp.update(train_examples, sgd=optimizer, losses=losses) doc = nlp(test_text) # test the trained model, using the pred since it has heads doc = nlp(train_examples[0].predicted) + # XXX This actually tests that it can overfit + assert spans2ints(doc) == spans2ints(train_examples[0].reference) # Also test the results are still the same after IO with make_tempdir() as tmp_dir: @@ -134,6 +136,7 @@ def test_overfitting_IO(nlp): "I noticed many friends around me", "They received it. They received the SMS.", ] + # XXX Note these have no predictions because they have no input spans docs1 = list(nlp.pipe(texts)) docs2 = list(nlp.pipe(texts)) docs3 = [nlp(text) for text in texts] @@ -175,7 +178,7 @@ def test_tokenization_mismatch(nlp): test_text = TRAIN_DATA[0][0] doc = nlp(test_text) - for i in range(100): + for i in range(15): losses = {} nlp.update(train_examples, sgd=optimizer, losses=losses) doc = nlp(test_text) @@ -183,12 +186,8 @@ def test_tokenization_mismatch(nlp): # test the trained model; need to use doc with head spans on it already test_doc = train_examples[0].predicted doc = nlp(test_doc) - - # XXX DEBUG - print("SPANS", len(doc.spans)) - for key, val in doc.spans.items(): - print(key, val) - print("...") + # XXX This actually tests that it can overfit + assert spans2ints(doc) == spans2ints(train_examples[0].reference) # Also test the results are still the same after IO with make_tempdir() as tmp_dir: @@ -209,5 +208,4 @@ def test_tokenization_mismatch(nlp): docs3 = [nlp(text) for text in texts] assert spans2ints(docs1[0]) == spans2ints(docs2[0]) assert spans2ints(docs1[0]) == spans2ints(docs3[0]) - assert False