Fix bug from Entityruler: ent_ids returns None for phrases (#8169)

* bugfix for explosion/spaCy#8168

* add test for explosion/spaCy#8168
This commit is contained in:
Dhruv Naik 2021-05-31 14:08:53 +05:30 committed by GitHub
parent b0467d2972
commit 283f64a98d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 12 additions and 1 deletions

View File

@ -303,7 +303,7 @@ class EntityRuler(Pipe):
self.nlp.pipe(phrase_pattern_texts),
phrase_pattern_ids,
):
phrase_pattern = {"label": label, "pattern": pattern, "id": ent_id}
phrase_pattern = {"label": label, "pattern": pattern}
if ent_id:
phrase_pattern["id"] = ent_id
phrase_patterns.append(phrase_pattern)

View File

@ -0,0 +1,11 @@
from spacy.lang.en import English
def test_issue8168():
nlp = English()
ruler = nlp.add_pipe("entity_ruler")
patterns = [{"label": "ORG", "pattern": "Apple"},
{"label": "GPE", "pattern": [{"LOWER": "san"}, {"LOWER": "francisco"}], "id": "san-francisco"},
{"label": "GPE", "pattern": [{"LOWER": "san"}, {"LOWER": "fran"}], "id": "san-francisco"}]
ruler.add_patterns(patterns)
assert ruler._ent_ids == {8043148519967183733: ('GPE', 'san-francisco')}