diff --git a/website/docs/api/matcher.jade b/website/docs/api/matcher.jade index 6b1b233e6..5e15f852c 100644 --- a/website/docs/api/matcher.jade +++ b/website/docs/api/matcher.jade @@ -20,9 +20,8 @@ p Create the rule-based #[code Matcher]. +aside-code("Example"). from spacy.matcher import Matcher - from spacy.attrs import LOWER - patterns = {"HelloWorld": [{LOWER: "hello"}, {LOWER: "world"}]} + patterns = {'HelloWorld': [{'LOWER': 'hello'}, {'LOWER': 'world'}]} matcher = Matcher(nlp.vocab) +table(["Name", "Type", "Description"]) @@ -50,10 +49,9 @@ p Find all token sequences matching the supplied patterns on the #[code Doc]. +aside-code("Example"). from spacy.matcher import Matcher - from spacy.attrs import LOWER matcher = Matcher(nlp.vocab) - pattern = [{LOWER: "hello"}, {LOWER: "world"}] + pattern = [{'LOWER': "hello"}, {'LOWER': "world"}] matcher.add("HelloWorld", on_match=None, pattern) doc = nlp(u'hello world!') matches = matcher(doc) @@ -129,7 +127,7 @@ p +aside-code("Example"). matcher = Matcher(nlp.vocab) assert len(matcher) == 0 - matcher.add('Rule', None, [{ORTH: 'test'}]) + matcher.add('Rule', None, [{'ORTH': 'test'}]) assert len(matcher) == 1 +table(["Name", "Type", "Description"]) @@ -146,7 +144,7 @@ p Check whether the matcher contains rules for a match ID. +aside-code("Example"). matcher = Matcher(nlp.vocab) assert 'Rule' in matcher == False - matcher.add('Rule', None, [{ORTH: 'test'}]) + matcher.add('Rule', None, [{'ORTH': 'test'}]) assert 'Rule' in matcher == True +table(["Name", "Type", "Description"]) @@ -175,8 +173,8 @@ p print('Matched!', matches) matcher = Matcher(nlp.vocab) - matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}]) - matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}]) + matcher.add('HelloWorld', on_match, [{'LOWER': 'hello'}, {'LOWER': 'world'}]) + matcher.add('GoogleMaps', on_match, [{'ORTH': 'Google'}, {'ORTH': 'Maps'}]) doc = nlp(u'HELLO WORLD on Google Maps.') matches = matcher(doc) @@ -208,7 +206,7 @@ p | ID does not exist. +aside-code("Example"). - matcher.add('Rule', None, [{ORTH: 'test'}]) + matcher.add('Rule', None, [{'ORTH': 'test'}]) assert 'Rule' in matcher == True matcher.remove('Rule') assert 'Rule' in matcher == False @@ -228,7 +226,7 @@ p | patterns. +aside-code("Example"). - pattern = [{ORTH: 'test'}] + pattern = [{'ORTH': 'test'}] matcher.add('Rule', None, pattern) (on_match, patterns) = matcher.get('Rule') assert patterns = [pattern] diff --git a/website/docs/usage/rule-based-matching.jade b/website/docs/usage/rule-based-matching.jade index 2e14e12a9..ef26f69b6 100644 --- a/website/docs/usage/rule-based-matching.jade +++ b/website/docs/usage/rule-based-matching.jade @@ -30,7 +30,7 @@ p | or "WORLD". +code. - [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}] + [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}] p | First, we initialise the #[code Matcher] with a vocab. The matcher must @@ -43,13 +43,12 @@ p +code. import spacy from spacy.matcher import Matcher - from spacy.attrs import LOWER, IS_PUNCT # don't forget to import the attrs! nlp = spacy.load('en') matcher = Matcher(nlp.vocab) # add match ID "HelloWorld" with no callback and one pattern matcher.add('HelloWorld', on_match=None, - [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}]) + [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}]) doc = nlp(u'Hello, world! Hello world!') matches = matcher(doc) @@ -63,8 +62,8 @@ p +code. matcher.add('HelloWorld', on_match=None, - [{LOWER: 'hello'}, {IS_PUNCT: True}, {LOWER: 'world'}], - [{LOWER: 'hello'}, {LOWER: 'world'}]) + [{'LOWER': 'hello'}, {'IS_PUNCT': True}, {'LOWER': 'world'}], + [{'LOWER': 'hello'}, {'LOWER': 'world'}]) p | By default, the matcher will only return the matches and @@ -92,14 +91,13 @@ p +code. import spacy from spacy.matcher import Matcher - from spacy.attrs import ORTH, UPPER, LOWER, IS_DIGIT nlp = spacy.load('en') matcher = Matcher(nlp.vocab) matcher.add('GoogleIO', on_match=add_event_ent, - [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}], - [{ORTH: 'Google'}, {UPPER: 'I'}, {ORTH: '/'}, {UPPER: 'O'}, {IS_DIGIT: True}]) + [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}], + [{'ORTH': 'Google'}, {'UPPER': 'I'}, {'ORTH': '/'}, {'UPPER': 'O'}, {'IS_DIGIT': True}]) # Get the ID of the 'EVENT' entity type. This is required to set an entity. EVENT = nlp.vocab.strings['EVENT'] @@ -120,8 +118,8 @@ p +code. matcher.add('BAD_HTML', on_match=merge_and_flag, - [{ORTH: '<'}, {LOWER: 'br'}, {ORTH: '>'}], - [{ORTH: '<'}, {LOWER: 'br/'}, {ORTH: '>'}]) + [{'ORTH': '<'}, {'LOWER': 'br'}, {'ORTH': '>'}], + [{'ORTH': '<'}, {'LOWER': 'br/'}, {'ORTH': '>'}]) # Add a new custom flag to the vocab, which is always False by default. # BAD_HTML_FLAG will be the flag ID, which we can use to set it to True on the span.