Improve matcher example (resolves #3287)

This commit is contained in:
Ines Montani 2019-02-18 13:26:37 +01:00
parent 660cfe44c5
commit 38e4422c0d

View File

@ -306,28 +306,29 @@ match on the uppercase versions, in case someone has written it as "Google i/o".
### {executable="true"} ### {executable="true"}
import spacy import spacy
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.tokens import Span
nlp = spacy.load("en_core_web_sm") nlp = spacy.load("en_core_web_sm")
matcher = Matcher(nlp.vocab) matcher = Matcher(nlp.vocab)
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
EVENT = nlp.vocab.strings["EVENT"]
def add_event_ent(matcher, doc, i, matches): def add_event_ent(matcher, doc, i, matches):
# Get the current match and create tuple of entity label, start and end. # Get the current match and create tuple of entity label, start and end.
# Append entity to the doc's entity. (Don't overwrite doc.ents!) # Append entity to the doc's entity. (Don't overwrite doc.ents!)
match_id, start, end = matches[i] match_id, start, end = matches[i]
entity = (EVENT, start, end) entity = Span(doc, start, end, label="EVENT")
doc.ents += (entity,) doc.ents += (entity,)
print(doc[start:end].text, entity) print(entity.text)
matcher.add("GoogleIO", add_event_ent, pattern = [{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}]
[{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}], matcher.add("GoogleIO", add_event_ent, pattern)
[{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}, {"IS_DIGIT": True}],) doc = nlp(u"This is a text about Google I/O.")
doc = nlp(u"This is a text about Google I/O 2015.")
matches = matcher(doc) matches = matcher(doc)
``` ```
A very similar logic has been implemented in the built-in
[`EntityRuler`](/api/entityruler) by the way. It also takes care of handling
overlapping matches, which you would otherwise have to take care of yourself.
> #### Tip: Visualizing matches > #### Tip: Visualizing matches
> >
> When working with entities, you can use [displaCy](/api/top-level#displacy) to > When working with entities, you can use [displaCy](/api/top-level#displacy) to