diff --git a/spacy/tests/regression/test_issue3345.py b/spacy/tests/regression/test_issue3345.py new file mode 100644 index 000000000..7b1f41fbf --- /dev/null +++ b/spacy/tests/regression/test_issue3345.py @@ -0,0 +1,27 @@ +"""Test interaction between preset entities and sentence boundaries in NER.""" +import spacy +from spacy.tokens import Doc +from spacy.pipeline import EntityRuler, EntityRecognizer + + +@pytest.mark.xfail +def test_issue3345(): + """Test case where preset entity crosses sentence boundary.""" + nlp = spacy.blank("en") + doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"]) + doc[4].is_sent_start = True + + ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}]) + ner = EntityRecognizer(doc.vocab) + # Add the OUT action. I wouldn't have thought this would be necessary... + ner.moves.add_action(5, "") + ner.add_label("GPE") + + doc = ruler(doc) + # Get into the state just before "New" + state = ner.moves.init_batch([doc])[0] + ner.moves.apply_transition(state, "O") + ner.moves.apply_transition(state, "O") + ner.moves.apply_transition(state, "O") + # Check that B-GPE is valid. + assert ner.moves.is_valid(state, "B-GPE")