mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Update Matcher API docs
This commit is contained in:
parent
e39ad78267
commit
9edc7fb0ba
|
@ -45,7 +45,7 @@ p
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add_pattern('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}])
|
matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}])
|
||||||
|
|
||||||
doc = nlp(u'Hello, world! Hello world!')
|
doc = nlp(u'Hello, world! Hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -58,8 +58,8 @@ p
|
||||||
| without punctuation between "hello" and "world":
|
| without punctuation between "hello" and "world":
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add_pattern('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
|
matcher.add('HelloWorld', [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
|
||||||
[{LOWER: 'hello'}, {LOWER: 'world'}])
|
[{LOWER: 'hello'}, {LOWER: 'world'}])
|
||||||
|
|
||||||
p
|
p
|
||||||
| By default, the matcher will only return the matches and
|
| By default, the matcher will only return the matches and
|
||||||
|
@ -81,7 +81,7 @@ p
|
||||||
| To be safe, you only match on the uppercase versions, in case someone has
|
| To be safe, you only match on the uppercase versions, in case someone has
|
||||||
| written it as "Google i/o". You also add a second pattern with an added
|
| written it as "Google i/o". You also add a second pattern with an added
|
||||||
| #[code {IS_DIGIT: True}] token – this will make sure you also match on
|
| #[code {IS_DIGIT: True}] token – this will make sure you also match on
|
||||||
| "Google I/O 2017". If this pattern matches, spaCy should execute your
|
| "Google I/O 2017". If your pattern matches, spaCy should execute your
|
||||||
| custom callback function #[code add_event_ent].
|
| custom callback function #[code add_event_ent].
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
|
@ -92,17 +92,16 @@ p
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
matcher.add_pattern('GoogleIO', [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}],
|
matcher.add('GoogleIO', [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}],
|
||||||
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}],
|
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}],
|
||||||
on_match=add_event_ent)
|
on_match=add_event_ent)
|
||||||
|
|
||||||
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
||||||
EVENT = nlp.vocab.strings['EVENT']
|
EVENT = nlp.vocab.strings['EVENT']
|
||||||
|
|
||||||
def add_event_ent(matcher, doc, i, matches):
|
def add_event_ent(matcher, doc, i, matches):
|
||||||
# Get the current match and create tuple of entity label, start and end.
|
# Get the current match and create tuple of entity label, start and end.
|
||||||
# Append entity to the doc's entity. (Don't overwrite doc.ents, in case
|
# Append entity to the doc's entity. (Don't overwrite doc.ents!)
|
||||||
# it already has other entities!)
|
|
||||||
match_id, start, end = matches[i]
|
match_id, start, end = matches[i]
|
||||||
doc.ents += ((EVENT, start, end),)
|
doc.ents += ((EVENT, start, end),)
|
||||||
|
|
||||||
|
@ -115,12 +114,12 @@ p
|
||||||
| function #[code merge_and_flag]:
|
| function #[code merge_and_flag]:
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add_pattern('BAD_HTML', [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}],
|
matcher.add('BAD_HTML', [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}],
|
||||||
[{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}]
|
[{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}]
|
||||||
on_match=merge_and_flag)
|
on_match=merge_and_flag)
|
||||||
|
|
||||||
# Add a new custom flag to the vocab, which is always False by default.
|
# Add a new custom flag to the vocab, which is always False by default.
|
||||||
# BAD_HTML will be the flag ID, which we can use to set it to True on the span.
|
# BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.
|
||||||
BAD_HTML_FLAG = doc.vocab.add_flag(lambda text: False)
|
BAD_HTML_FLAG = doc.vocab.add_flag(lambda text: False)
|
||||||
|
|
||||||
def merge_and_flag(matcher, doc, i, matches):
|
def merge_and_flag(matcher, doc, i, matches):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user