mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Fix matcher tests and matcher docs
This commit is contained in:
parent
f497cf60b2
commit
e6acd3bbf2
|
@ -17,7 +17,7 @@ def test_issue429(EN):
|
|||
|
||||
doc = EN('a')
|
||||
matcher = Matcher(EN.vocab)
|
||||
matcher.add('TEST', on_match=merge_phrases, [{'ORTH': 'a'}])
|
||||
matcher.add('TEST', merge_phrases, [{'ORTH': 'a'}])
|
||||
doc = EN.tokenizer('a b c')
|
||||
EN.tagger(doc)
|
||||
matcher(doc)
|
||||
|
|
|
@ -52,7 +52,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
|||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
pattern = [{'LOWER': "hello"}, {'LOWER': "world"}]
|
||||
matcher.add("HelloWorld", on_match=None, pattern)
|
||||
matcher.add("HelloWorld", None, pattern)
|
||||
doc = nlp(u'hello world!')
|
||||
matches = matcher(doc)
|
||||
|
||||
|
|
|
@ -47,8 +47,8 @@ p
|
|||
nlp = spacy.load('en')
|
||||
matcher = Matcher(nlp.vocab)
|
||||
# add match ID "HelloWorld" with no callback and one pattern
|
||||
matcher.add('HelloWorld', on_match=None,
|
||||
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}])
|
||||
pattern = [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]
|
||||
matcher.add('HelloWorld', None, pattern)
|
||||
|
||||
doc = nlp(u'Hello, world! Hello world!')
|
||||
matches = matcher(doc)
|
||||
|
@ -61,7 +61,7 @@ p
|
|||
| without punctuation between "hello" and "world":
|
||||
|
||||
+code.
|
||||
matcher.add('HelloWorld', on_match=None,
|
||||
matcher.add('HelloWorld', None,
|
||||
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}],
|
||||
[{'LOWER': 'hello'}, {'LOWER': 'world'}])
|
||||
|
||||
|
@ -104,7 +104,7 @@ p
|
|||
match_id, start, end = matches[i]
|
||||
doc.ents += ((EVENT, start, end),)
|
||||
|
||||
matcher.add('GoogleIO', on_match=add_event_ent,
|
||||
matcher.add('GoogleIO', add_event_ent,
|
||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
|
||||
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
|
||||
|
||||
|
@ -127,7 +127,7 @@ p
|
|||
span.merge(is_stop=True) # merge (and mark it as a stop word, just in case)
|
||||
span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG
|
||||
|
||||
matcher.add('BAD_HTML', on_match=merge_and_flag,
|
||||
matcher.add('BAD_HTML', merge_and_flag,
|
||||
[{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}],
|
||||
[{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}])
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user