mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Improve matcher example (resolves #3287)
This commit is contained in:
parent
660cfe44c5
commit
38e4422c0d
|
@ -306,28 +306,29 @@ match on the uppercase versions, in case someone has written it as "Google i/o".
|
||||||
### {executable="true"}
|
### {executable="true"}
|
||||||
import spacy
|
import spacy
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
|
from spacy.tokens import Span
|
||||||
|
|
||||||
nlp = spacy.load("en_core_web_sm")
|
nlp = spacy.load("en_core_web_sm")
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
|
||||||
EVENT = nlp.vocab.strings["EVENT"]
|
|
||||||
|
|
||||||
def add_event_ent(matcher, doc, i, matches):
|
def add_event_ent(matcher, doc, i, matches):
|
||||||
# Get the current match and create tuple of entity label, start and end.
|
# Get the current match and create tuple of entity label, start and end.
|
||||||
# Append entity to the doc's entity. (Don't overwrite doc.ents!)
|
# Append entity to the doc's entity. (Don't overwrite doc.ents!)
|
||||||
match_id, start, end = matches[i]
|
match_id, start, end = matches[i]
|
||||||
entity = (EVENT, start, end)
|
entity = Span(doc, start, end, label="EVENT")
|
||||||
doc.ents += (entity,)
|
doc.ents += (entity,)
|
||||||
print(doc[start:end].text, entity)
|
print(entity.text)
|
||||||
|
|
||||||
matcher.add("GoogleIO", add_event_ent,
|
pattern = [{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}]
|
||||||
[{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}],
|
matcher.add("GoogleIO", add_event_ent, pattern)
|
||||||
[{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}, {"IS_DIGIT": True}],)
|
doc = nlp(u"This is a text about Google I/O.")
|
||||||
doc = nlp(u"This is a text about Google I/O 2015.")
|
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
A very similar logic has been implemented in the built-in
|
||||||
|
[`EntityRuler`](/api/entityruler) by the way. It also takes care of handling
|
||||||
|
overlapping matches, which you would otherwise have to take care of yourself.
|
||||||
|
|
||||||
> #### Tip: Visualizing matches
|
> #### Tip: Visualizing matches
|
||||||
>
|
>
|
||||||
> When working with entities, you can use [displaCy](/api/top-level#displacy) to
|
> When working with entities, you can use [displaCy](/api/top-level#displacy) to
|
||||||
|
|
Loading…
Reference in New Issue
Block a user