mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-24 00:04:15 +03:00
a5b1f6dcec
💫 Fix NER when preset entities cross sentence boundaries
31 lines
989 B
Python
31 lines
989 B
Python
# coding: utf8
|
|
from __future__ import unicode_literals
|
|
|
|
import pytest
|
|
from spacy.lang.en import English
|
|
from spacy.tokens import Doc
|
|
from spacy.pipeline import EntityRuler, EntityRecognizer
|
|
|
|
|
|
@pytest.mark.xfail
|
|
def test_issue3345():
|
|
"""Test case where preset entity crosses sentence boundary."""
|
|
nlp = English()
|
|
doc = Doc(nlp.vocab, words=["I", "live", "in", "New", "York"])
|
|
doc[4].is_sent_start = True
|
|
|
|
ruler = EntityRuler(nlp, patterns=[{"label": "GPE", "pattern": "New York"}])
|
|
ner = EntityRecognizer(doc.vocab)
|
|
# Add the OUT action. I wouldn't have thought this would be necessary...
|
|
ner.moves.add_action(5, "")
|
|
ner.add_label("GPE")
|
|
|
|
doc = ruler(doc)
|
|
# Get into the state just before "New"
|
|
state = ner.moves.init_batch([doc])[0]
|
|
ner.moves.apply_transition(state, "O")
|
|
ner.moves.apply_transition(state, "O")
|
|
ner.moves.apply_transition(state, "O")
|
|
# Check that B-GPE is valid.
|
|
assert ner.moves.is_valid(state, "B-GPE")
|