Update docstrings and API docs for Matcher

This commit is contained in:
ines 2017-05-20 14:26:10 +02:00
parent 790435e51c
commit c00ff257be
2 changed files with 97 additions and 12 deletions

View File

@ -199,9 +199,18 @@ cdef class Matcher:
return (self.__class__, (self.vocab, self._patterns), None, None) return (self.__class__, (self.vocab, self._patterns), None, None)
def __len__(self): def __len__(self):
"""Get the number of rules added to the matcher.
RETURNS (int): The number of rules.
"""
return len(self._patterns) return len(self._patterns)
def __contains__(self, key): def __contains__(self, key):
"""Check whether the matcher contains rules for a match ID.
key (unicode): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID.
"""
return len(self._patterns) return len(self._patterns)
def add(self, key, on_match, *patterns): def add(self, key, on_match, *patterns):
@ -209,9 +218,9 @@ cdef class Matcher:
A match-rule consists of: an ID key, an on_match callback, and one or A match-rule consists of: an ID key, an on_match callback, and one or
more patterns. If the key exists, the patterns are appended to the more patterns. If the key exists, the patterns are appended to the
previous ones, and the previous on_match callback is replaced. The previous ones, and the previous on_match callback is replaced. The
`on_match` callback will receive the arguments `(matcher, doc, i, matches)`. `on_match` callback will receive the arguments `(matcher, doc, i,
Note that if no `on_match` callback is specified, the document will not matches)`. You can also set `on_match` to `None` to not perform any
be modified. A pattern consists of one or more `token_specs`, where a actions. A pattern consists of one or more `token_specs`, where a
`token_spec` is a dictionary mapping attribute IDs to values. Token `token_spec` is a dictionary mapping attribute IDs to values. Token
descriptors can also include quantifiers. There are currently important descriptors can also include quantifiers. There are currently important
known problems with the quantifiers see the docs. known problems with the quantifiers see the docs.
@ -231,8 +240,10 @@ cdef class Matcher:
self._patterns[key].append(specs) self._patterns[key].append(specs)
def remove(self, key): def remove(self, key):
"""Remove a rule from the matcher. """Remove a rule from the matcher. A KeyError is raised if the key does
A KeyError is raised if the key does not exist. not exist.
key (unicode): The ID of the match rule.
""" """
key = self._normalize_key(key) key = self._normalize_key(key)
self._patterns.pop(key) self._patterns.pop(key)

View File

@ -54,7 +54,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
matcher = Matcher(nlp.vocab) matcher = Matcher(nlp.vocab)
pattern = [{LOWER: "hello"}, {LOWER: "world"}] pattern = [{LOWER: "hello"}, {LOWER: "world"}]
matcher.add_pattern("HelloWorld", pattern, on_match=None) matcher.add("HelloWorld", on_match=None, pattern)
doc = nlp(u'hello world!') doc = nlp(u'hello world!')
matches = matcher(doc) matches = matcher(doc)
@ -88,6 +88,12 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
p Match a stream of documents, yielding them in turn. p Match a stream of documents, yielding them in turn.
+aside-code("Example").
from spacy.matcher import Matcher
matcher = Matcher(nlp.vocab)
for doc in matcher.pipe(texts, batch_size=50, n_threads=4):
pass
+table(["Name", "Type", "Description"]) +table(["Name", "Type", "Description"])
+row +row
+cell #[code docs] +cell #[code docs]
@ -112,14 +118,60 @@ p Match a stream of documents, yielding them in turn.
+cell #[code Doc] +cell #[code Doc]
+cell Documents, in order. +cell Documents, in order.
+h(2, "add_pattern") Matcher.add +h(2, "len") Matcher.__len__
+tag method
p Get the number of rules added to the matcher.
+aside-code("Example").
from spacy.matcher import Matcher
from spacy.attrs import ORTH
matcher = Matcher(nlp.vocab)
assert len(matcher) == 0
matcher.add('rule', None, [{ORTH: 'rule'}])
assert len(matcher) == 1
+table(["Name", "Type", "Description"])
+footrow
+cell returns
+cell int
+cell The number of rules.
+h(2, "contains") Matcher.__contains__
+tag method
p Check whether the matcher contains rules for a match ID.
+aside-code("Example").
from spacy.matcher import Matcher
from spacy.attrs import ORTH
matcher = Matcher(nlp.vocab)
assert 'rule' in matcher == False
matcher.add('rule', None, [{ORTH: 'rule'}])
assert 'rule' in matcher == True
+table(["Name", "Type", "Description"])
+row
+cell #[code key]
+cell unicode
+cell The match ID.
+footrow
+cell returns
+cell int
+cell Whether the matcher contains rules for this match ID.
+h(2, "add") Matcher.add
+tag method +tag method
p p
| Add a rule to the matcher, consisting of an ID key, one or more patterns, and | Add a rule to the matcher, consisting of an ID key, one or more patterns, and
| a callback function to act on the matches. | a callback function to act on the matches. The callback function will
| The callback function will receive the arguments | receive the arguments #[code matcher], #[code doc], #[code i] and
| #[code matcher], #[code doc], #[code i] and #[code matches]. | #[code matches]. If a pattern already exists for the given ID, the
| patterns will be extended. An #[code on_match] callback will be
| overwritten.
+aside-code("Example"). +aside-code("Example").
from spacy.matcher import Matcher from spacy.matcher import Matcher
@ -131,7 +183,6 @@ p
matcher = Matcher(nlp.vocab) matcher = Matcher(nlp.vocab)
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}]) matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}]) matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
doc = nlp(u'HELLO WORLD on Google Maps.') doc = nlp(u'HELLO WORLD on Google Maps.')
matches = matcher(doc) matches = matcher(doc)
@ -143,7 +194,7 @@ p
+row +row
+cell #[code on_match] +cell #[code on_match]
+cell function +cell function or #[code None]
+cell +cell
| Callback function to act on matches. Takes the arguments | Callback function to act on matches. Takes the arguments
| #[code matcher], #[code doc], #[code i] and #[code matches]. | #[code matcher], #[code doc], #[code i] and #[code matches].
@ -154,3 +205,26 @@ p
+cell +cell
| Match pattern. A pattern consists of a list of dicts, where each | Match pattern. A pattern consists of a list of dicts, where each
| dict describes a token. | dict describes a token.
+h(2, "remove") Matcher.remove
+tag method
p
| Remove a rule from the matcher. A #[code KeyError] is raised if the match
| ID does not exist.
+aside-code("Example").
from spacy.matcher import Matcher
from spacy.attrs import ORTH
matcher = Matcher(nlp.vocab)
matcher.add('rule', None, [{ORTH: 'rule'}])
assert 'rule' in matcher == True
matcher.remove('rule')
assert 'rule' in matcher == False
+table(["Name", "Type", "Description"])
+row
+cell #[code key]
+cell unicode
+cell The ID of the match rule.