mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-08 00:06:37 +03:00
180 lines
4.2 KiB
Plaintext
180 lines
4.2 KiB
Plaintext
//- 💫 DOCS > API > MATCHER
|
|
|
|
include ../../_includes/_mixins
|
|
|
|
p Match sequences of tokens, based on pattern rules.
|
|
|
|
+h(2, "load") Matcher.load
|
|
+tag classmethod
|
|
|
|
p Load the matcher and patterns from a file path.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code path]
|
|
+cell #[code Path]
|
|
+cell Path to a JSON-formatted patterns file.
|
|
|
|
+row
|
|
+cell #[code vocab]
|
|
+cell #[code Vocab]
|
|
+cell The vocabulary that the documents to match over will refer to.
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell #[code Matcher]
|
|
+cell The newly constructed object.
|
|
|
|
+h(2, "init") Matcher.__init__
|
|
+tag method
|
|
|
|
p Create the Matcher.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code vocab]
|
|
+cell #[code Vocab]
|
|
+cell
|
|
| The vocabulary object, which must be shared with the documents
|
|
| the matcher will operate on.
|
|
|
|
+row
|
|
+cell #[code patterns]
|
|
+cell dict
|
|
+cell Patterns to add to the matcher.
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell #[code Matcher]
|
|
+cell The newly constructed object.
|
|
|
|
+h(2, "call") Matcher.__call__
|
|
+tag method
|
|
|
|
p Find all token sequences matching the supplied patterns on the Doc.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code doc]
|
|
+cell #[code Doc]
|
|
+cell The document to match over.
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell list
|
|
+cell
|
|
| A list of#[code (entity_key, label_id, start, end)] tuples,
|
|
| describing the matches. A match tuple describes a
|
|
| #[code span doc[start:end]]. The #[code label_id] and
|
|
| #[code entity_key] are both integers.
|
|
|
|
+h(2, "pipe") Matcher.pipe
|
|
+tag method
|
|
|
|
p Match a stream of documents, yielding them in turn.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code docs]
|
|
+cell -
|
|
+cell A stream of documents.
|
|
|
|
+row
|
|
+cell #[code batch_size]
|
|
+cell int
|
|
+cell The number of documents to accumulate into a working set.
|
|
|
|
+row
|
|
+cell #[code n_threads]
|
|
+cell int
|
|
+cell
|
|
| The number of threads with which to work on the buffer in
|
|
| parallel, if the #[code Matcher] implementation supports
|
|
| multi-threading.
|
|
|
|
+footrow
|
|
+cell yield
|
|
+cell #[code Doc]
|
|
+cell Documents, in order.
|
|
|
|
+h(2, "add_entity") Matcher.add_entity
|
|
+tag method
|
|
|
|
p Add an entity to the matcher.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code entity_key]
|
|
+cell unicode / int
|
|
+cell An ID for the entity.
|
|
|
|
+row
|
|
+cell #[code attrs]
|
|
+cell -
|
|
+cell Attributes to associate with the Matcher.
|
|
|
|
+row
|
|
+cell #[code if_exists]
|
|
+cell unicode
|
|
+cell
|
|
| #[code 'raise'], #[code 'ignore'] or #[code 'update']. Controls
|
|
| what happens if the entity ID already exists. Defaults to
|
|
| #[code 'raise'].
|
|
|
|
+row
|
|
+cell #[code acceptor]
|
|
+cell -
|
|
+cell Callback function to filter matches of the entity.
|
|
|
|
+row
|
|
+cell #[code on_match]
|
|
+cell -
|
|
+cell Callback function to act on matches of the entity.
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell #[code None]
|
|
+cell -
|
|
|
|
+h(2, "add_pattern") Matcher.add_pattern
|
|
+tag method
|
|
|
|
p Add a pattern to the matcher.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code entity_key]
|
|
+cell unicode / int
|
|
+cell An ID for the entity.
|
|
|
|
+row
|
|
+cell #[code token_specs]
|
|
+cell -
|
|
+cell Description of the pattern to be matched.
|
|
|
|
+row
|
|
+cell #[code label]
|
|
+cell unicode / int
|
|
+cell Label to assign to the matched pattern. Defaults to #[code ""].
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell #[code None]
|
|
+cell -
|
|
|
|
+h(2, "has_entity") Matcher.has_entity
|
|
+tag method
|
|
|
|
p Check whether the matcher has an entity.
|
|
|
|
+table(["Name", "Type", "Description"])
|
|
+row
|
|
+cell #[code entity_key]
|
|
+cell unicode / int
|
|
+cell The entity key to check.
|
|
|
|
+footrow
|
|
+cell return
|
|
+cell bool
|
|
+cell Whether the matcher has the entity.
|