mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Update docstrings and API docs for Matcher
This commit is contained in:
parent
790435e51c
commit
c00ff257be
|
@ -199,9 +199,18 @@ cdef class Matcher:
|
|||
return (self.__class__, (self.vocab, self._patterns), None, None)
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of rules added to the matcher.
|
||||
|
||||
RETURNS (int): The number of rules.
|
||||
"""
|
||||
return len(self._patterns)
|
||||
|
||||
def __contains__(self, key):
|
||||
"""Check whether the matcher contains rules for a match ID.
|
||||
|
||||
key (unicode): The match ID.
|
||||
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||
"""
|
||||
return len(self._patterns)
|
||||
|
||||
def add(self, key, on_match, *patterns):
|
||||
|
@ -209,9 +218,9 @@ cdef class Matcher:
|
|||
A match-rule consists of: an ID key, an on_match callback, and one or
|
||||
more patterns. If the key exists, the patterns are appended to the
|
||||
previous ones, and the previous on_match callback is replaced. The
|
||||
`on_match` callback will receive the arguments `(matcher, doc, i, matches)`.
|
||||
Note that if no `on_match` callback is specified, the document will not
|
||||
be modified. A pattern consists of one or more `token_specs`, where a
|
||||
`on_match` callback will receive the arguments `(matcher, doc, i,
|
||||
matches)`. You can also set `on_match` to `None` to not perform any
|
||||
actions. A pattern consists of one or more `token_specs`, where a
|
||||
`token_spec` is a dictionary mapping attribute IDs to values. Token
|
||||
descriptors can also include quantifiers. There are currently important
|
||||
known problems with the quantifiers – see the docs.
|
||||
|
@ -231,8 +240,10 @@ cdef class Matcher:
|
|||
self._patterns[key].append(specs)
|
||||
|
||||
def remove(self, key):
|
||||
"""Remove a rule from the matcher.
|
||||
A KeyError is raised if the key does not exist.
|
||||
"""Remove a rule from the matcher. A KeyError is raised if the key does
|
||||
not exist.
|
||||
|
||||
key (unicode): The ID of the match rule.
|
||||
"""
|
||||
key = self._normalize_key(key)
|
||||
self._patterns.pop(key)
|
||||
|
|
|
@ -54,7 +54,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
|||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
pattern = [{LOWER: "hello"}, {LOWER: "world"}]
|
||||
matcher.add_pattern("HelloWorld", pattern, on_match=None)
|
||||
matcher.add("HelloWorld", on_match=None, pattern)
|
||||
doc = nlp(u'hello world!')
|
||||
matches = matcher(doc)
|
||||
|
||||
|
@ -88,6 +88,12 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
|||
|
||||
p Match a stream of documents, yielding them in turn.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.matcher import Matcher
|
||||
matcher = Matcher(nlp.vocab)
|
||||
for doc in matcher.pipe(texts, batch_size=50, n_threads=4):
|
||||
pass
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code docs]
|
||||
|
@ -112,14 +118,60 @@ p Match a stream of documents, yielding them in turn.
|
|||
+cell #[code Doc]
|
||||
+cell Documents, in order.
|
||||
|
||||
+h(2, "add_pattern") Matcher.add
|
||||
+h(2, "len") Matcher.__len__
|
||||
+tag method
|
||||
|
||||
p Get the number of rules added to the matcher.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.attrs import ORTH
|
||||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
assert len(matcher) == 0
|
||||
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||
assert len(matcher) == 1
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell The number of rules.
|
||||
|
||||
+h(2, "contains") Matcher.__contains__
|
||||
+tag method
|
||||
|
||||
p Check whether the matcher contains rules for a match ID.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.attrs import ORTH
|
||||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
assert 'rule' in matcher == False
|
||||
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||
assert 'rule' in matcher == True
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code key]
|
||||
+cell unicode
|
||||
+cell The match ID.
|
||||
+footrow
|
||||
+cell returns
|
||||
+cell int
|
||||
+cell Whether the matcher contains rules for this match ID.
|
||||
|
||||
+h(2, "add") Matcher.add
|
||||
+tag method
|
||||
|
||||
p
|
||||
| Add a rule to the matcher, consisting of an ID key, one or more patterns, and
|
||||
| a callback function to act on the matches.
|
||||
| The callback function will receive the arguments
|
||||
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
||||
| a callback function to act on the matches. The callback function will
|
||||
| receive the arguments #[code matcher], #[code doc], #[code i] and
|
||||
| #[code matches]. If a pattern already exists for the given ID, the
|
||||
| patterns will be extended. An #[code on_match] callback will be
|
||||
| overwritten.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.matcher import Matcher
|
||||
|
@ -131,7 +183,6 @@ p
|
|||
matcher = Matcher(nlp.vocab)
|
||||
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
|
||||
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
|
||||
|
||||
doc = nlp(u'HELLO WORLD on Google Maps.')
|
||||
matches = matcher(doc)
|
||||
|
||||
|
@ -143,7 +194,7 @@ p
|
|||
|
||||
+row
|
||||
+cell #[code on_match]
|
||||
+cell function
|
||||
+cell function or #[code None]
|
||||
+cell
|
||||
| Callback function to act on matches. Takes the arguments
|
||||
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
||||
|
@ -154,3 +205,26 @@ p
|
|||
+cell
|
||||
| Match pattern. A pattern consists of a list of dicts, where each
|
||||
| dict describes a token.
|
||||
|
||||
+h(2, "remove") Matcher.remove
|
||||
+tag method
|
||||
|
||||
p
|
||||
| Remove a rule from the matcher. A #[code KeyError] is raised if the match
|
||||
| ID does not exist.
|
||||
|
||||
+aside-code("Example").
|
||||
from spacy.matcher import Matcher
|
||||
from spacy.attrs import ORTH
|
||||
|
||||
matcher = Matcher(nlp.vocab)
|
||||
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||
assert 'rule' in matcher == True
|
||||
matcher.remove('rule')
|
||||
assert 'rule' in matcher == False
|
||||
|
||||
+table(["Name", "Type", "Description"])
|
||||
+row
|
||||
+cell #[code key]
|
||||
+cell unicode
|
||||
+cell The ID of the match rule.
|
||||
|
|
Loading…
Reference in New Issue
Block a user