Improve matcher example (resolves #3287)

2025-11-08 11:57:39 +03:00 · 2019-02-18 13:26:37 +01:00 · 2019-02-18 13:26:37 +01:00 · 38e4422c0d
commit 38e4422c0d
parent 660cfe44c5
1 changed files with 10 additions and 9 deletions
--- a/website/docs/usage/rule-based-matching.md
+++ b/website/docs/usage/rule-based-matching.md
@ -306,28 +306,29 @@ match on the uppercase versions, in case someone has written it as "Google i/o".
 ### {executable="true"}
 import spacy
 from spacy.matcher import Matcher
+from spacy.tokens import Span

 nlp = spacy.load("en_core_web_sm")
 matcher = Matcher(nlp.vocab)

-# Get the ID of the 'EVENT' entity type. This is required to set an entity.
-EVENT = nlp.vocab.strings["EVENT"]
-
 def add_event_ent(matcher, doc, i, matches):
    # Get the current match and create tuple of entity label, start and end.
    # Append entity to the doc's entity. (Don't overwrite doc.ents!)
    match_id, start, end = matches[i]
-    entity = (EVENT, start, end)
+    entity = Span(doc, start, end, label="EVENT")
    doc.ents += (entity,)
-    print(doc[start:end].text, entity)
+    print(entity.text)

-matcher.add("GoogleIO", add_event_ent,
-            [{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}],
-            [{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}, {"IS_DIGIT": True}],)
-doc = nlp(u"This is a text about Google I/O 2015.")
+pattern = [{"ORTH": "Google"}, {"ORTH": "I"}, {"ORTH": "/"}, {"ORTH": "O"}]
+matcher.add("GoogleIO", add_event_ent, pattern)
+doc = nlp(u"This is a text about Google I/O.")
 matches = matcher(doc)
 ```

+A very similar logic has been implemented in the built-in
+[`EntityRuler`](/api/entityruler) by the way. It also takes care of handling
+overlapping matches, which you would otherwise have to take care of yourself.
+
 > #### Tip: Visualizing matches
 >
 > When working with entities, you can use [displaCy](/api/top-level#displacy) to