mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Update docstrings and API docs for Matcher
This commit is contained in:
parent
790435e51c
commit
c00ff257be
|
@ -199,9 +199,18 @@ cdef class Matcher:
|
||||||
return (self.__class__, (self.vocab, self._patterns), None, None)
|
return (self.__class__, (self.vocab, self._patterns), None, None)
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
|
"""Get the number of rules added to the matcher.
|
||||||
|
|
||||||
|
RETURNS (int): The number of rules.
|
||||||
|
"""
|
||||||
return len(self._patterns)
|
return len(self._patterns)
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
|
"""Check whether the matcher contains rules for a match ID.
|
||||||
|
|
||||||
|
key (unicode): The match ID.
|
||||||
|
RETURNS (bool): Whether the matcher contains rules for this match ID.
|
||||||
|
"""
|
||||||
return len(self._patterns)
|
return len(self._patterns)
|
||||||
|
|
||||||
def add(self, key, on_match, *patterns):
|
def add(self, key, on_match, *patterns):
|
||||||
|
@ -209,9 +218,9 @@ cdef class Matcher:
|
||||||
A match-rule consists of: an ID key, an on_match callback, and one or
|
A match-rule consists of: an ID key, an on_match callback, and one or
|
||||||
more patterns. If the key exists, the patterns are appended to the
|
more patterns. If the key exists, the patterns are appended to the
|
||||||
previous ones, and the previous on_match callback is replaced. The
|
previous ones, and the previous on_match callback is replaced. The
|
||||||
`on_match` callback will receive the arguments `(matcher, doc, i, matches)`.
|
`on_match` callback will receive the arguments `(matcher, doc, i,
|
||||||
Note that if no `on_match` callback is specified, the document will not
|
matches)`. You can also set `on_match` to `None` to not perform any
|
||||||
be modified. A pattern consists of one or more `token_specs`, where a
|
actions. A pattern consists of one or more `token_specs`, where a
|
||||||
`token_spec` is a dictionary mapping attribute IDs to values. Token
|
`token_spec` is a dictionary mapping attribute IDs to values. Token
|
||||||
descriptors can also include quantifiers. There are currently important
|
descriptors can also include quantifiers. There are currently important
|
||||||
known problems with the quantifiers – see the docs.
|
known problems with the quantifiers – see the docs.
|
||||||
|
@ -231,8 +240,10 @@ cdef class Matcher:
|
||||||
self._patterns[key].append(specs)
|
self._patterns[key].append(specs)
|
||||||
|
|
||||||
def remove(self, key):
|
def remove(self, key):
|
||||||
"""Remove a rule from the matcher.
|
"""Remove a rule from the matcher. A KeyError is raised if the key does
|
||||||
A KeyError is raised if the key does not exist.
|
not exist.
|
||||||
|
|
||||||
|
key (unicode): The ID of the match rule.
|
||||||
"""
|
"""
|
||||||
key = self._normalize_key(key)
|
key = self._normalize_key(key)
|
||||||
self._patterns.pop(key)
|
self._patterns.pop(key)
|
||||||
|
|
|
@ -54,7 +54,7 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
||||||
|
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
pattern = [{LOWER: "hello"}, {LOWER: "world"}]
|
pattern = [{LOWER: "hello"}, {LOWER: "world"}]
|
||||||
matcher.add_pattern("HelloWorld", pattern, on_match=None)
|
matcher.add("HelloWorld", on_match=None, pattern)
|
||||||
doc = nlp(u'hello world!')
|
doc = nlp(u'hello world!')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
|
||||||
|
@ -88,6 +88,12 @@ p Find all token sequences matching the supplied patterns on the #[code Doc].
|
||||||
|
|
||||||
p Match a stream of documents, yielding them in turn.
|
p Match a stream of documents, yielding them in turn.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.matcher import Matcher
|
||||||
|
matcher = Matcher(nlp.vocab)
|
||||||
|
for doc in matcher.pipe(texts, batch_size=50, n_threads=4):
|
||||||
|
pass
|
||||||
|
|
||||||
+table(["Name", "Type", "Description"])
|
+table(["Name", "Type", "Description"])
|
||||||
+row
|
+row
|
||||||
+cell #[code docs]
|
+cell #[code docs]
|
||||||
|
@ -112,14 +118,60 @@ p Match a stream of documents, yielding them in turn.
|
||||||
+cell #[code Doc]
|
+cell #[code Doc]
|
||||||
+cell Documents, in order.
|
+cell Documents, in order.
|
||||||
|
|
||||||
+h(2, "add_pattern") Matcher.add
|
+h(2, "len") Matcher.__len__
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Get the number of rules added to the matcher.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.matcher import Matcher
|
||||||
|
from spacy.attrs import ORTH
|
||||||
|
|
||||||
|
matcher = Matcher(nlp.vocab)
|
||||||
|
assert len(matcher) == 0
|
||||||
|
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||||
|
assert len(matcher) == 1
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell The number of rules.
|
||||||
|
|
||||||
|
+h(2, "contains") Matcher.__contains__
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p Check whether the matcher contains rules for a match ID.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.matcher import Matcher
|
||||||
|
from spacy.attrs import ORTH
|
||||||
|
|
||||||
|
matcher = Matcher(nlp.vocab)
|
||||||
|
assert 'rule' in matcher == False
|
||||||
|
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||||
|
assert 'rule' in matcher == True
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code key]
|
||||||
|
+cell unicode
|
||||||
|
+cell The match ID.
|
||||||
|
+footrow
|
||||||
|
+cell returns
|
||||||
|
+cell int
|
||||||
|
+cell Whether the matcher contains rules for this match ID.
|
||||||
|
|
||||||
|
+h(2, "add") Matcher.add
|
||||||
+tag method
|
+tag method
|
||||||
|
|
||||||
p
|
p
|
||||||
| Add a rule to the matcher, consisting of an ID key, one or more patterns, and
|
| Add a rule to the matcher, consisting of an ID key, one or more patterns, and
|
||||||
| a callback function to act on the matches.
|
| a callback function to act on the matches. The callback function will
|
||||||
| The callback function will receive the arguments
|
| receive the arguments #[code matcher], #[code doc], #[code i] and
|
||||||
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
| #[code matches]. If a pattern already exists for the given ID, the
|
||||||
|
| patterns will be extended. An #[code on_match] callback will be
|
||||||
|
| overwritten.
|
||||||
|
|
||||||
+aside-code("Example").
|
+aside-code("Example").
|
||||||
from spacy.matcher import Matcher
|
from spacy.matcher import Matcher
|
||||||
|
@ -131,7 +183,6 @@ p
|
||||||
matcher = Matcher(nlp.vocab)
|
matcher = Matcher(nlp.vocab)
|
||||||
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
|
matcher.add('HelloWorld', on_match, [{LOWER: "hello"}, {LOWER: "world"}])
|
||||||
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
|
matcher.add('GoogleMaps', on_match, [{ORTH: "Google"}, {ORTH: "Maps"}])
|
||||||
|
|
||||||
doc = nlp(u'HELLO WORLD on Google Maps.')
|
doc = nlp(u'HELLO WORLD on Google Maps.')
|
||||||
matches = matcher(doc)
|
matches = matcher(doc)
|
||||||
|
|
||||||
|
@ -143,7 +194,7 @@ p
|
||||||
|
|
||||||
+row
|
+row
|
||||||
+cell #[code on_match]
|
+cell #[code on_match]
|
||||||
+cell function
|
+cell function or #[code None]
|
||||||
+cell
|
+cell
|
||||||
| Callback function to act on matches. Takes the arguments
|
| Callback function to act on matches. Takes the arguments
|
||||||
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
| #[code matcher], #[code doc], #[code i] and #[code matches].
|
||||||
|
@ -154,3 +205,26 @@ p
|
||||||
+cell
|
+cell
|
||||||
| Match pattern. A pattern consists of a list of dicts, where each
|
| Match pattern. A pattern consists of a list of dicts, where each
|
||||||
| dict describes a token.
|
| dict describes a token.
|
||||||
|
|
||||||
|
+h(2, "remove") Matcher.remove
|
||||||
|
+tag method
|
||||||
|
|
||||||
|
p
|
||||||
|
| Remove a rule from the matcher. A #[code KeyError] is raised if the match
|
||||||
|
| ID does not exist.
|
||||||
|
|
||||||
|
+aside-code("Example").
|
||||||
|
from spacy.matcher import Matcher
|
||||||
|
from spacy.attrs import ORTH
|
||||||
|
|
||||||
|
matcher = Matcher(nlp.vocab)
|
||||||
|
matcher.add('rule', None, [{ORTH: 'rule'}])
|
||||||
|
assert 'rule' in matcher == True
|
||||||
|
matcher.remove('rule')
|
||||||
|
assert 'rule' in matcher == False
|
||||||
|
|
||||||
|
+table(["Name", "Type", "Description"])
|
||||||
|
+row
|
||||||
|
+cell #[code key]
|
||||||
|
+cell unicode
|
||||||
|
+cell The ID of the match rule.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user