Update docstrings and API docs for Matcher

2026-01-11 03:01:25 +03:00 · 2017-05-20 14:26:10 +02:00 · 2017-05-20 14:26:10 +02:00 · c00ff257be
commit c00ff257be
parent 790435e51c
2 changed files with 97 additions and 12 deletions
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@ -199,9 +199,18 @@ cdef class Matcher:
        return (self.__class__, (self.vocab, self._patterns), None, None)

    def __len__(self):
+        """Get the number of rules added to the matcher.
+
+        RETURNS (int): The number of rules.
+        """
        return len(self._patterns)

    def __contains__(self, key):
+        """Check whether the matcher contains rules for a match ID.
+
+        key (unicode): The match ID.
+        RETURNS (bool): Whether the matcher contains rules for this match ID.
+        """
        return len(self._patterns)

    def add(self, key, on_match, *patterns):
@ -209,9 +218,9 @@ cdef class Matcher:
        A match-rule consists of: an ID key, an on_match callback, and one or
        more patterns. If the key exists, the patterns are appended to the
        previous ones, and the previous on_match callback is replaced. The
-        `on_match` callback will receive the arguments `(matcher, doc, i, matches)`.
-        Note that if no `on_match` callback is specified, the document will not
-        be modified. A pattern consists of one or more `token_specs`, where a
+        `on_match` callback will receive the arguments `(matcher, doc, i,
+        matches)`. You can also set `on_match` to `None` to not perform any
+        actions. A pattern consists of one or more `token_specs`, where a
        `token_spec` is a dictionary mapping attribute IDs to values. Token
        descriptors can also include quantifiers. There are currently important
        known problems with the quantifiers – see the docs.
@ -231,8 +240,10 @@ cdef class Matcher:
            self._patterns[key].append(specs)

    def remove(self, key):
-        """Remove a rule from the matcher.
-        A KeyError is raised if the key does not exist.
+        """Remove a rule from the matcher. A KeyError is raised if the key does
+        not exist.
+
+        key (unicode): The ID of the match rule.
        """
        key = self._normalize_key(key)
        self._patterns.pop(key)
--- a/website/docs/api/matcher.jade
+++ b/website/docs/api/matcher.jade
@ -54,7 +54,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].

    matcher = Matcher(nlp.vocab)
    pattern = [{LOWER: "hello"}, {LOWER: "world"}]
-    matcher.add_pattern("HelloWorld", pattern, on_match=None)
+    matcher.add("HelloWorld", on_match=None, pattern)
    doc = nlp(u'hello world!')
    matches = matcher(doc)

@ -88,6 +88,12 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].

 p Match a stream of documents, yielding them in turn.

+aside-code("Example").
+    from spacy.matcher import Matcher
+    matcher = Matcher(nlp.vocab)
+    for doc in matcher.pipe(texts, batch_size=50, n_threads=4):
+        pass
+
 +table(["Name", "Type", "Description"])
    +row
        +cell #[code docs]
@ -112,14 +118,60 @@ p Match a stream of documents, yielding them in turn.
        +cell #[code Doc]
        +cell Documents, in order.

-+h(2, "add_pattern") Matcher.add
+h(2, "len") Matcher.__len__
+    +tag method
+
+p Get the number of rules added to the matcher.
+
+aside-code("Example").
+    from spacy.matcher import Matcher
+    from spacy.attrs import ORTH
+
+    matcher = Matcher(nlp.vocab)
+    assert len(matcher) == 0
+    matcher.add('rule', None, [{ORTH: 'rule'}])
+    assert len(matcher) == 1
+
+table(["Name", "Type", "Description"])
+    +footrow
+        +cell returns
+        +cell int
+        +cell The number of rules.
+
+h(2, "contains") Matcher.__contains__
+    +tag method
+
+p Check whether the matcher contains rules for a match ID.
+
+aside-code("Example").
+    from spacy.matcher import Matcher
+    from spacy.attrs import ORTH
+
+    matcher = Matcher(nlp.vocab)
+    assert 'rule' in matcher == False
+    matcher.add('rule', None, [{ORTH: 'rule'}])
+    assert 'rule' in matcher == True
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code key]
+        +cell unicode
+        +cell The match ID.
+    +footrow
+        +cell returns
+        +cell int
+        +cell Whether the matcher contains rules for this match ID.
+
+h(2, "add") Matcher.add
    +tag method

 p
    |  Add a rule to the matcher, consisting of an ID key, one or more patterns, and
-    |  a callback function to act on the matches.
-    |  The callback function will receive the arguments
-    |  #[code matcher], #[code doc], #[code i] and #[code matches].
+    |  a callback function to act on the matches. The callback function will
+    |  receive the arguments #[code matcher], #[code doc], #[code i] and
+    |  #[code matches]. If a pattern already exists for the given ID, the
+    |  patterns will be extended. An #[code on_match] callback will be
+    |  overwritten.

 +aside-code("Example").
    from spacy.matcher import Matcher
@ -131,7 +183,6 @@ p
    matcher = Matcher(nlp.vocab)
    matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
    matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
-
    doc = nlp(u'HELLO WORLD on Google Maps.')
    matches = matcher(doc)

@ -143,7 +194,7 @@ p

    +row
        +cell #[code on_match]
-        +cell function
+        +cell function or #[code None]
        +cell
            |  Callback function to act on matches. Takes the arguments
            |  #[code matcher], #[code doc], #[code i] and #[code matches].
@ -154,3 +205,26 @@ p
        +cell
            |  Match pattern. A pattern consists of a list of dicts, where each
            |  dict describes a token.
+
+h(2, "remove") Matcher.remove
+    +tag method
+
+p
+    |  Remove a rule from the matcher. A #[code KeyError] is raised if the match
+    |  ID does not exist.
+
+aside-code("Example").
+    from spacy.matcher import Matcher
+    from spacy.attrs import ORTH
+
+    matcher = Matcher(nlp.vocab)
+    matcher.add('rule', None, [{ORTH: 'rule'}])
+    assert 'rule' in matcher == True
+    matcher.remove('rule')
+    assert 'rule' in matcher == False
+
+table(["Name", "Type", "Description"])
+    +row
+        +cell #[code key]
+        +cell unicode
+        +cell The ID of the match rule.