diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index 24778ff77..a7a26b6b3 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -23,7 +23,6 @@ cpdef Doc annotations2doc(vocab, tok_annot, doc_annot): cdef class Example: def __init__(self, Doc predicted, Doc reference, *, Alignment alignment=None): """ Doc can either be text, or an actual Doc """ - assert predicted.vocab is reference.vocab msg = "Example.__init__ got None for '{arg}'. Requires Doc." if predicted is None: raise TypeError(msg.format(arg="predicted")) @@ -89,11 +88,12 @@ cdef class Example: output.append(None) elif gold_i is None: if i in i2j_multi: - output.append(vocab.strings[gold_values[i2j_multi[i]]]) + output.append(gold_values[i2j_multi[i]]) else: output.append(None) else: - output.append(vocab.strings[gold_values[gold_i]]) + output.append([gold_values[gold_i]]) + output = [vocab.strings[o] for o in output] return output def to_dict(self): diff --git a/spacy/tests/test_new_example.py b/spacy/tests/test_new_example.py index 517329dba..b7af77149 100644 --- a/spacy/tests/test_new_example.py +++ b/spacy/tests/test_new_example.py @@ -32,11 +32,11 @@ def test_Example_from_dict_invalid(annots): Example.from_dict(predicted, annots) -@pytest.mark.parametrize("gold_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]]) +@pytest.mark.parametrize("pred_words", [["ice", "cream"], ["icecream"], ["i", "ce", "cream"]]) @pytest.mark.parametrize("annots", [{"words": ["icecream"], "tags": ["NN"]}]) -def test_Example_from_dict_with_tags(gold_words, annots): +def test_Example_from_dict_with_tags(pred_words, annots): vocab = Vocab() - predicted = Doc(vocab, words=gold_words) + predicted = Doc(vocab, words=pred_words) example = Example.from_dict(predicted, annots) for i, token in enumerate(example.reference): assert token.tag_ == annots["tags"][i] @@ -44,6 +44,30 @@ def test_Example_from_dict_with_tags(gold_words, annots): assert aligned_tags == ["NN" for _ in predicted] +def test_aligned_tags(): + pred_words = ["Apply", "some", "sunscreen", "unless", "you", "can", "not"] + gold_words = ["Apply", "some", "sun", "screen", "unless", "you", "cannot"] + gold_tags = ["VERB", "DET", "NOUN", "NOUN", "SCONJ", "PRON", "VERB"] + annots = {"words": gold_words, "tags": gold_tags} + vocab = Vocab() + predicted = Doc(vocab, words=pred_words) + example = Example.from_dict(predicted, annots) + aligned_tags = example.get_aligned("tag") + assert aligned_tags == ["VERB", "DET", None, "SCONJ", "PRON", "VERB", "VERB"] + + +def test_aligned_tags_multi(): + pred_words = ["Applysome", "sunscreen", "unless", "you", "can", "not"] + gold_words = ["Apply", "somesun", "screen", "unless", "you", "cannot"] + gold_tags = ["VERB", "DET", "NOUN", "SCONJ", "PRON", "VERB"] + annots = {"words": gold_words, "tags": gold_tags} + vocab = Vocab() + predicted = Doc(vocab, words=pred_words) + example = Example.from_dict(predicted, annots) + aligned_tags = example.get_aligned("tag") + assert aligned_tags == [None, None, "SCONJ", "PRON", "VERB", "VERB"] + + @pytest.mark.parametrize( "annots", [