diff --git a/spacy/tests/spans/test_merge.py b/spacy/tests/spans/test_merge.py index 86712f771..29cc917fe 100644 --- a/spacy/tests/spans/test_merge.py +++ b/spacy/tests/spans/test_merge.py @@ -19,6 +19,15 @@ def test_spans_merge_tokens(en_tokenizer): assert doc[0].text == 'Los Angeles' assert doc[0].head.text == 'start' + doc = get_doc(tokens.vocab, [t.text for t in tokens], heads=heads) + assert len(doc) == 4 + assert doc[0].head.text == 'Angeles' + assert doc[1].head.text == 'start' + doc.merge(0, len('Los Angeles'), tag='NNP', lemma='Los Angeles', label='GPE') + assert len(doc) == 3 + assert doc[0].text == 'Los Angeles' + assert doc[0].head.text == 'start' + assert doc[0].ent_type_ == 'GPE' def test_spans_merge_heads(en_tokenizer): text = "I found a pilates class near work." @@ -114,4 +123,4 @@ def test_spans_subtree_size_check(en_tokenizer): sent1 = list(doc.sents)[0] init_len = len(list(sent1.root.subtree)) doc[0:2].merge('none', 'none', 'none') - assert len(list(sent1.root.subtree)) == init_len - 1 + assert len(list(sent1.root.subtree)) == init_len - 1 \ No newline at end of file diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index bda528383..348d84012 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -667,6 +667,13 @@ cdef class Doc: attributes[TAG] = self.vocab.strings[tag] attributes[LEMMA] = self.vocab.strings[lemma] attributes[ENT_TYPE] = self.vocab.strings[ent_type] + elif not args: + if "label" in attributes and ENT_TYPE not in attributes: + if type(attributes["label"]) == int: + attributes[ENT_TYPE] = attributes["label"] + else: + attributes[ENT_TYPE] = self.vocab.strings[attributes["label"]] + elif args: raise ValueError( "Doc.merge received %d non-keyword arguments. "