From afea6505f3b0ebaf82d6e4dd25e6cb63ed2b3087 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Thu, 27 Oct 2016 18:01:34 +0200 Subject: [PATCH] Test Issue 429: No valid actions for NER after matcher adds a new entity label. --- spacy/tests/regression/test_issue429.py | 29 +++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 spacy/tests/regression/test_issue429.py diff --git a/spacy/tests/regression/test_issue429.py b/spacy/tests/regression/test_issue429.py new file mode 100644 index 000000000..b3e6b2831 --- /dev/null +++ b/spacy/tests/regression/test_issue429.py @@ -0,0 +1,29 @@ +from __future__ import unicode_literals +import pytest + +import spacy +from spacy.attrs import ORTH + + +@pytest.mark.models +def test_issue429(): + + nlp = spacy.load('en', parser=False) + + + def merge_phrases(matcher, doc, i, matches): + if i != len(matches) - 1: + return None + spans = [(ent_id, label, doc[start:end]) for ent_id, label, start, end in matches] + for ent_id, label, span in spans: + span.merge('NNP' if label else span.root.tag_, span.text, nlp.vocab.strings[label]) + + doc = nlp('a') + nlp.matcher.add('key', label='TEST', attrs={}, specs=[[{ORTH: 'a'}]], on_match=merge_phrases) + doc = nlp.tokenizer('a b c') + nlp.tagger(doc) + nlp.matcher(doc) + + for word in doc: + print(word.text, word.ent_iob_, word.ent_type_) + nlp.entity(doc)