mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Use string values for attrs in Matcher docs
This commit is contained in:
parent
b3c7ee0148
commit
d5a6a9a6a9
|
@ -20,9 +20,8 @@ p Create the rule-based #[code Matcher].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import LOWER
|
|
||||||
|
|
||||||
patterns = {"HelloWorld": [{LOWER: "hello"}, {LOWER: "world"}]}
|
patterns = {'HelloWorld': [{'LOWER': 'hello'}, {'LOWER': 'world'}]}
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
@ -50,10 +49,9 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import LOWER
|
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
pattern = [{LOWER: "hello"}, {LOWER: "world"}]
|
pattern = [{'LOWER': "hello"}, {'LOWER': "world"}]
|
||||||
matcher.add("HelloWorld", on_match=None, pattern)
|
matcher.add("HelloWorld", on_match=None, pattern)
|
||||||
doc = nlp(u'hello world!')
|
doc = nlp(u'hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -129,7 +127,7 @@ p
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
assert len(matcher) == 0
|
assert len(matcher) == 0
|
||||||
matcher.add('Rule', None, [{ORTH: 'test'}])
|
matcher.add('Rule', None, [{'ORTH': 'test'}])
|
||||||
assert len(matcher) == 1
|
assert len(matcher) == 1
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
@ -146,7 +144,7 @@ p Check whether the matcher contains rules for a match ID.
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
assert 'Rule' in matcher == False
|
assert 'Rule' in matcher == False
|
||||||
matcher.add('Rule', None, [{ORTH: 'test'}])
|
matcher.add('Rule', None, [{'ORTH': 'test'}])
|
||||||
assert 'Rule' in matcher == True
|
assert 'Rule' in matcher == True
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
|
@ -175,8 +173,8 @@ p
|
||||||
print('Matched!', matches)
|
print('Matched!', matches)
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
|
matcher.add('HelloWorld', on_match, [{'LOWER': 'hello'}, {'LOWER': 'world'}])
|
||||||
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
|
matcher.add('GoogleMaps', on_match, [{'ORTH': 'Google'}, {'ORTH': 'Maps'}])
|
||||||
doc = nlp(u'HELLO WORLD on Google Maps.')
|
doc = nlp(u'HELLO WORLD on Google Maps.')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
|
||||||
|
@ -208,7 +206,7 @@ p
|
||||||
| ID does not exist.
|
| ID does not exist.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
matcher.add('Rule', None, [{ORTH: 'test'}])
|
matcher.add('Rule', None, [{'ORTH': 'test'}])
|
||||||
assert 'Rule' in matcher == True
|
assert 'Rule' in matcher == True
|
||||||
matcher.remove('Rule')
|
matcher.remove('Rule')
|
||||||
assert 'Rule' in matcher == False
|
assert 'Rule' in matcher == False
|
||||||
|
@ -228,7 +226,7 @@ p
|
||||||
| patterns.
|
| patterns.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
pattern = [{ORTH: 'test'}]
|
pattern = [{'ORTH': 'test'}]
|
||||||
matcher.add('Rule', None, pattern)
|
matcher.add('Rule', None, pattern)
|
||||||
(on_match, patterns) = matcher.get('Rule')
|
(on_match, patterns) = matcher.get('Rule')
|
||||||
assert patterns = [pattern]
|
assert patterns = [pattern]
|
||||||
|
|
|
@ -30,7 +30,7 @@ p
|
||||||
| or "WORLD".
|
| or "WORLD".
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
[{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}]
|
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]
|
||||||
|
|
||||||
p
|
p
|
||||||
| First, we initialise the #[code Matcher] with a vocab. The matcher must
|
| First, we initialise the #[code Matcher] with a vocab. The matcher must
|
||||||
|
@ -43,13 +43,12 @@ p
|
||||||
+code.
|
+code.
|
||||||
import spacy
|
import spacy
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import LOWER, IS_PUNCT # don't forget to import the attrs!
|
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
# add match ID "HelloWorld" with no callback and one pattern
|
# add match ID "HelloWorld" with no callback and one pattern
|
||||||
matcher.add('HelloWorld', on_match=None,
|
matcher.add('HelloWorld', on_match=None,
|
||||||
[{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}])
|
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}])
|
||||||
|
|
||||||
doc = nlp(u'Hello, world! Hello world!')
|
doc = nlp(u'Hello, world! Hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
@ -63,8 +62,8 @@ p
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add('HelloWorld', on_match=None,
|
matcher.add('HelloWorld', on_match=None,
|
||||||
[{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}],
|
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}],
|
||||||
[{LOWER: 'hello'}, {LOWER: 'world'}])
|
[{'LOWER': 'hello'}, {'LOWER': 'world'}])
|
||||||
|
|
||||||
p
|
p
|
||||||
| By default, the matcher will only return the matches and
|
| By default, the matcher will only return the matches and
|
||||||
|
@ -92,14 +91,13 @@ p
|
||||||
+code.
|
+code.
|
||||||
import spacy
|
import spacy
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
from spacy.attrs import ORTH, UPPER, LOWER, IS_DIGIT
|
|
||||||
|
|
||||||
nlp = spacy.load('en')
|
nlp = spacy.load('en')
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
|
|
||||||
matcher.add('GoogleIO', on_match=add_event_ent,
|
matcher.add('GoogleIO', on_match=add_event_ent,
|
||||||
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}],
|
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
|
||||||
[{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}])
|
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
|
||||||
|
|
||||||
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
# Get the ID of the 'EVENT' entity type. This is required to set an entity.
|
||||||
EVENT = nlp.vocab.strings['EVENT']
|
EVENT = nlp.vocab.strings['EVENT']
|
||||||
|
@ -120,8 +118,8 @@ p
|
||||||
|
|
||||||
+code.
|
+code.
|
||||||
matcher.add('BAD_HTML', on_match=merge_and_flag,
|
matcher.add('BAD_HTML', on_match=merge_and_flag,
|
||||||
[{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}],
|
[{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}],
|
||||||
[{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}])
|
[{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}])
|
||||||
|
|
||||||
# Add a new custom flag to the vocab, which is always False by default.
|
# Add a new custom flag to the vocab, which is always False by default.
|
||||||
# BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.
|
# BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user