diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx index eeffc1551..87aaa3c50 100644 --- a/spacy/matcher.pyx +++ b/spacy/matcher.pyx @@ -199,9 +199,18 @@ cdef class Matcher: return (self.__class__, (self.vocab, self._patterns), None, None) def __len__(self): + """Get the number of rules added to the matcher. + + RETURNS (int): The number of rules. + """ return len(self._patterns) def __contains__(self, key): + """Check whether the matcher contains rules for a match ID. + + key (unicode): The match ID. + RETURNS (bool): Whether the matcher contains rules for this match ID. + """ return len(self._patterns) def add(self, key, on_match, *patterns): @@ -209,9 +218,9 @@ cdef class Matcher: A match-rule consists of: an ID key, an on_match callback, and one or more patterns. If the key exists, the patterns are appended to the previous ones, and the previous on_match callback is replaced. The - `on_match` callback will receive the arguments `(matcher, doc, i, matches)`. - Note that if no `on_match` callback is specified, the document will not - be modified. A pattern consists of one or more `token_specs`, where a + `on_match` callback will receive the arguments `(matcher, doc, i, + matches)`. You can also set `on_match` to `None` to not perform any + actions. A pattern consists of one or more `token_specs`, where a `token_spec` is a dictionary mapping attribute IDs to values. Token descriptors can also include quantifiers. There are currently important known problems with the quantifiers – see the docs. @@ -231,8 +240,10 @@ cdef class Matcher: self._patterns[key].append(specs) def remove(self, key): - """Remove a rule from the matcher. - A KeyError is raised if the key does not exist. + """Remove a rule from the matcher. A KeyError is raised if the key does + not exist. + + key (unicode): The ID of the match rule. """ key = self._normalize_key(key) self._patterns.pop(key) diff --git a/website/docs/api/matcher.jade b/website/docs/api/matcher.jade index b092587bc..6dae73c51 100644 --- a/website/docs/api/matcher.jade +++ b/website/docs/api/matcher.jade @@ -54,7 +54,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc]. matcher = Matcher(nlp.vocab) pattern = [{LOWER: "hello"}, {LOWER: "world"}] - matcher.add_pattern("HelloWorld", pattern, on_match=None) + matcher.add("HelloWorld", on_match=None, pattern) doc = nlp(u'hello world!') matches = matcher(doc) @@ -88,6 +88,12 @@ p Find all token sequences matching the supplied patterns on the #[code Doc]. p Match a stream of documents, yielding them in turn. ++aside-code("Example"). + from spacy.matcher import Matcher + matcher = Matcher(nlp.vocab) + for doc in matcher.pipe(texts, batch_size=50, n_threads=4): + pass + +table(["Name", "Type", "Description"]) +row +cell #[code docs] @@ -112,14 +118,60 @@ p Match a stream of documents, yielding them in turn. +cell #[code Doc] +cell Documents, in order. -+h(2, "add_pattern") Matcher.add ++h(2, "len") Matcher.__len__ + +tag method + +p Get the number of rules added to the matcher. + ++aside-code("Example"). + from spacy.matcher import Matcher + from spacy.attrs import ORTH + + matcher = Matcher(nlp.vocab) + assert len(matcher) == 0 + matcher.add('rule', None, [{ORTH: 'rule'}]) + assert len(matcher) == 1 + ++table(["Name", "Type", "Description"]) + +footrow + +cell returns + +cell int + +cell The number of rules. + ++h(2, "contains") Matcher.__contains__ + +tag method + +p Check whether the matcher contains rules for a match ID. + ++aside-code("Example"). + from spacy.matcher import Matcher + from spacy.attrs import ORTH + + matcher = Matcher(nlp.vocab) + assert 'rule' in matcher == False + matcher.add('rule', None, [{ORTH: 'rule'}]) + assert 'rule' in matcher == True + ++table(["Name", "Type", "Description"]) + +row + +cell #[code key] + +cell unicode + +cell The match ID. + +footrow + +cell returns + +cell int + +cell Whether the matcher contains rules for this match ID. + ++h(2, "add") Matcher.add +tag method p | Add a rule to the matcher, consisting of an ID key, one or more patterns, and - | a callback function to act on the matches. - | The callback function will receive the arguments - | #[code matcher], #[code doc], #[code i] and #[code matches]. + | a callback function to act on the matches. The callback function will + | receive the arguments #[code matcher], #[code doc], #[code i] and + | #[code matches]. If a pattern already exists for the given ID, the + | patterns will be extended. An #[code on_match] callback will be + | overwritten. +aside-code("Example"). from spacy.matcher import Matcher @@ -131,7 +183,6 @@ p matcher = Matcher(nlp.vocab) matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}]) matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}]) - doc = nlp(u'HELLO WORLD on Google Maps.') matches = matcher(doc) @@ -143,7 +194,7 @@ p +row +cell #[code on_match] - +cell function + +cell function or #[code None] +cell | Callback function to act on matches. Takes the arguments | #[code matcher], #[code doc], #[code i] and #[code matches]. @@ -154,3 +205,26 @@ p +cell | Match pattern. A pattern consists of a list of dicts, where each | dict describes a token. + ++h(2, "remove") Matcher.remove + +tag method + +p + | Remove a rule from the matcher. A #[code KeyError] is raised if the match + | ID does not exist. + ++aside-code("Example"). + from spacy.matcher import Matcher + from spacy.attrs import ORTH + + matcher = Matcher(nlp.vocab) + matcher.add('rule', None, [{ORTH: 'rule'}]) + assert 'rule' in matcher == True + matcher.remove('rule') + assert 'rule' in matcher == False + ++table(["Name", "Type", "Description"]) + +row + +cell #[code key] + +cell unicode + +cell The ID of the match rule.