Fix matcher tests and matcher docs

This commit is contained in:
ines 2017-05-23 11:36:02 +02:00
parent f497cf60b2
commit e6acd3bbf2
3 changed files with 7 additions and 7 deletions

View File

@ -17,7 +17,7 @@ def test_issue429(EN):
doc = EN('a')
matcher = Matcher(EN.vocab)
matcher.add('TEST', on_match=merge_phrases, [{'ORTH': 'a'}])
matcher.add('TEST', merge_phrases, [{'ORTH': 'a'}])
doc = EN.tokenizer('a b c')
EN.tagger(doc)
matcher(doc)

View File

@ -52,7 +52,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
matcher = Matcher(nlp.vocab)
pattern = [{'LOWER': "hello"}, {'LOWER': "world"}]
matcher.add("HelloWorld", on_match=None, pattern)
matcher.add("HelloWorld", None, pattern)
doc = nlp(u'hello world!')
matches = matcher(doc)

View File

@ -47,8 +47,8 @@ p
nlp = spacy.load('en')
matcher = Matcher(nlp.vocab)
# add match ID "HelloWorld" with no callback and one pattern
matcher.add('HelloWorld', on_match=None,
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}])
pattern = [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]
matcher.add('HelloWorld', None, pattern)
doc = nlp(u'Hello, world! Hello world!')
matches = matcher(doc)
@ -61,7 +61,7 @@ p
| without punctuation between "hello" and "world":
+code.
matcher.add('HelloWorld', on_match=None,
matcher.add('HelloWorld', None,
[{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}],
[{'LOWER': 'hello'}, {'LOWER': 'world'}])
@ -104,7 +104,7 @@ p
match_id, start, end = matches[i]
doc.ents += ((EVENT, start, end),)
matcher.add('GoogleIO', on_match=add_event_ent,
matcher.add('GoogleIO', add_event_ent,
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}],
[{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}])
@ -127,7 +127,7 @@ p
span.merge(is_stop=True) # merge (and mark it as a stop word, just in case)
span.set_flag(BAD_HTML_FLAG, True) # set BAD_HTML_FLAG
matcher.add('BAD_HTML', on_match=merge_and_flag,
matcher.add('BAD_HTML', merge_and_flag,
[{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}],
[{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}])