Update docs and fix consistency

This commit is contained in:
Ines Montani 2020-08-09 22:31:52 +02:00
parent 7c6854d8d4
commit d5c78c7a34
10 changed files with 326 additions and 54 deletions

View File

@ -35,7 +35,7 @@ def pretrain_cli(
config_path: Path = Arg(..., help="Path to config file", exists=True, dir_okay=False),
code_path: Optional[Path] = Opt(None, "--code-path", "-c", help="Path to Python file with additional code (registered functions) to be imported"),
resume_path: Optional[Path] = Opt(None, "--resume-path", "-r", help="Path to pretrained weights from which to resume pretraining"),
epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using '--resume_path'. Prevents unintended overwriting of existing weight files."),
epoch_resume: Optional[int] = Opt(None, "--epoch-resume", "-er", help="The epoch to resume counting from when using --resume-path. Prevents unintended overwriting of existing weight files."),
use_gpu: int = Opt(-1, "--gpu-id", "-g", help="GPU ID or -1 for CPU"),
# fmt: on
):

View File

@ -68,11 +68,11 @@ cdef class DependencyMatcher:
key (str): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID.
"""
return self._normalize_key(key) in self._patterns
return self.has_key(key)
def validateInput(self, pattern, key):
def validate_input(self, pattern, key):
idx = 0
visitedNodes = {}
visited_nodes = {}
for relation in pattern:
if "PATTERN" not in relation or "SPEC" not in relation:
raise ValueError(Errors.E098.format(key=key))
@ -83,7 +83,7 @@ cdef class DependencyMatcher:
and "NBOR_NAME" not in relation["SPEC"]
):
raise ValueError(Errors.E099.format(key=key))
visitedNodes[relation["SPEC"]["NODE_NAME"]] = True
visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
else:
if not(
"NODE_NAME" in relation["SPEC"]
@ -92,22 +92,28 @@ cdef class DependencyMatcher:
):
raise ValueError(Errors.E100.format(key=key))
if (
relation["SPEC"]["NODE_NAME"] in visitedNodes
or relation["SPEC"]["NBOR_NAME"] not in visitedNodes
relation["SPEC"]["NODE_NAME"] in visited_nodes
or relation["SPEC"]["NBOR_NAME"] not in visited_nodes
):
raise ValueError(Errors.E101.format(key=key))
visitedNodes[relation["SPEC"]["NODE_NAME"]] = True
visitedNodes[relation["SPEC"]["NBOR_NAME"]] = True
visited_nodes[relation["SPEC"]["NODE_NAME"]] = True
visited_nodes[relation["SPEC"]["NBOR_NAME"]] = True
idx = idx + 1
def add(self, key, patterns, *_patterns, on_match=None):
"""Add a new matcher rule to the matcher.
key (str): The match ID.
patterns (list): The patterns to add for the given key.
on_match (callable): Optional callback executed on match.
"""
if patterns is None or hasattr(patterns, "__call__"): # old API
on_match = patterns
patterns = _patterns
for pattern in patterns:
if len(pattern) == 0:
raise ValueError(Errors.E012.format(key=key))
self.validateInput(pattern,key)
self.validate_input(pattern,key)
key = self._normalize_key(key)
_patterns = []
for pattern in patterns:
@ -187,8 +193,7 @@ cdef class DependencyMatcher:
key (string or int): The key to check.
RETURNS (bool): Whether the matcher has the rule.
"""
key = self._normalize_key(key)
return key in self._patterns
return self._normalize_key(key) in self._patterns
def get(self, key, default=None):
"""Retrieve the pattern stored for a key.
@ -202,6 +207,13 @@ cdef class DependencyMatcher:
return (self._callbacks[key], self._patterns[key])
def __call__(self, Doc doc):
"""Find all token sequences matching the supplied pattern.
doclike (Doc or Span): The document to match over.
RETURNS (list): A list of `(key, start, end)` tuples,
describing the matches. A match tuple describes a span
`doc[start:end]`. The `label_id` and `key` are both integers.
"""
matched_key_trees = []
matches = self.token_matcher(doc)
for key in list(self._patterns.keys()):
@ -241,25 +253,25 @@ cdef class DependencyMatcher:
on_match(self, doc, i, matched_key_trees)
return matched_key_trees
def recurse(self,tree,id_to_position,_node_operator_map,int patternLength,visitedNodes,matched_trees):
def recurse(self,tree,id_to_position,_node_operator_map,int patternLength,visited_nodes,matched_trees):
cdef bool isValid;
if(patternLength == len(id_to_position.keys())):
isValid = True
for node in range(patternLength):
if(node in tree):
for idx, (relop,nbor) in enumerate(tree[node]):
computed_nbors = numpy.asarray(_node_operator_map[visitedNodes[node]][relop])
computed_nbors = numpy.asarray(_node_operator_map[visited_nodes[node]][relop])
isNbor = False
for computed_nbor in computed_nbors:
if(computed_nbor.i == visitedNodes[nbor]):
if(computed_nbor.i == visited_nodes[nbor]):
isNbor = True
isValid = isValid & isNbor
if(isValid):
matched_trees.append(visitedNodes)
matched_trees.append(visited_nodes)
return
allPatternNodes = numpy.asarray(id_to_position[patternLength])
for patternNode in allPatternNodes:
self.recurse(tree,id_to_position,_node_operator_map,patternLength+1,visitedNodes+[patternNode],matched_trees)
self.recurse(tree,id_to_position,_node_operator_map,patternLength+1,visited_nodes+[patternNode],matched_trees)
# Given a node and an edge operator, to return the list of nodes
# from the doc that belong to node+operator. This is used to store

View File

@ -70,7 +70,7 @@ cdef class Matcher:
key (str): The match ID.
RETURNS (bool): Whether the matcher contains rules for this match ID.
"""
return self._normalize_key(key) in self._patterns
return self.has_key(key)
def add(self, key, patterns, *, on_match=None, greedy: str=None):
"""Add a match-rule to the matcher. A match-rule consists of: an ID
@ -162,8 +162,7 @@ cdef class Matcher:
key (string or int): The key to check.
RETURNS (bool): Whether the matcher has the rule.
"""
key = self._normalize_key(key)
return key in self._patterns
return self._normalize_key(key) in self._patterns
def get(self, key, default=None):
"""Retrieve the pattern stored for a key.
@ -179,7 +178,7 @@ cdef class Matcher:
def pipe(self, docs, batch_size=1000, return_matches=False, as_tuples=False):
"""Match a stream of documents, yielding them in turn.
docs (iterable): A stream of documents.
docs (Iterable[Union[Doc, Span]]): A stream of documents or spans.
batch_size (int): Number of documents to accumulate into a working set.
return_matches (bool): Yield the match lists along with the docs, making
results (doc, matches) tuples.

View File

@ -75,8 +75,8 @@ class Morphologizer(Tagger):
model (thinc.api.Model): The Thinc Model powering the pipeline component.
name (str): The component instance name, used to add entries to the
losses during training.
labels_morph (dict): TODO:
labels_pos (dict): TODO:
labels_morph (dict): Mapping of morph + POS tags to morph labels.
labels_pos (dict): Mapping of morph + POS tags to POS tags.
DOCS: https://spacy.io/api/morphologizer#init
"""

View File

@ -601,9 +601,7 @@ $ python -m spacy train [config_path] [--output] [--code] [--verbose] [overrides
## Pretrain {#pretrain new="2.1" tag="experimental"}
<!-- TODO: document new pretrain command and link to new pretraining docs -->
Pre-train the "token to vector" ([`Tok2vec`](/api/tok2vec)) layer of pipeline
Pretrain the "token to vector" ([`Tok2vec`](/api/tok2vec)) layer of pipeline
components on [raw text](/api/data-formats#pretrain), using an approximate
language-modeling objective. Specifically, we load pretrained vectors, and train
a component like a CNN, BiLSTM, etc to predict vectors which match the
@ -611,7 +609,8 @@ pretrained ones. The weights are saved to a directory after each epoch. You can
then include a **path to one of these pretrained weights files** in your
[training config](/usage/training#config) as the `init_tok2vec` setting when you
train your model. This technique may be especially helpful if you have little
labelled data.
labelled data. See the usage docs on [pretraining](/usage/training#pretraining)
for more info.
<Infobox title="Changed in v3.0" variant="warning">
@ -634,8 +633,8 @@ $ python -m spacy pretrain [texts_loc] [output_dir] [config_path]
| `output_dir` | positional | Directory to write models to on each epoch. |
| `config_path` | positional | Path to [training config](/api/data-formats#config) file containing all settings and hyperparameters. |
| `--code`, `-c` | option | Path to Python file with additional code to be imported. Allows [registering custom functions](/usage/training#custom-models) for new architectures. |
| `--resume-path`, `-r` | option | TODO: |
| `--epoch-resume`, `-er` | option | TODO: |
| `--resume-path`, `-r` | option | Path to pretrained weights from which to resume pretraining. |
| `--epoch-resume`, `-er` | option | The epoch to resume counting from when using `--resume-path`. Prevents unintended overwriting of existing weight files. |
| `--help`, `-h` | flag | Show help message and available arguments. |
| overrides | | Config parameters to override. Should be options starting with `--` that correspond to the config section and value to override, e.g. `--training.use_gpu 1`. |
| **CREATES** | weights | The pretrained weights that can be used to initialize `spacy train`. |

View File

@ -20,9 +20,9 @@ Config files define the training process and model pipeline and can be passed to
[`spacy train`](/api/cli#train). They use
[Thinc's configuration system](https://thinc.ai/docs/usage-config) under the
hood. For details on how to use training configs, see the
[usage documentation](/usage/training#config).
<!-- TODO: add details on getting started and init config -->
[usage documentation](/usage/training#config). To get started with a blank
config or fill a partial config with all defaults, you can use the
[`init config`](/api/cli#init-config) command.
> #### What does the @ mean?
>
@ -52,8 +52,6 @@ your config and check that it's valid, you can run the
</Infobox>
<!-- TODO: once we know how we want to implement "starter config" workflow or outputting a full default config for the user, update this section with the command -->
### nlp {#config-nlp tag="section"}
> #### Example
@ -154,8 +152,6 @@ This section is optional and defines settings and controls for
[language model pretraining](/usage/training#pretraining). It's used when you
run [`spacy pretrain`](/api/cli#pretrain).
<!-- TODO: complete -->
| Name | Type | Description | Default |
| ---------------------------- | --------------------------------------------------- | ----------------------------------------------------------------------------- | --------------------------------------------------- |
| `max_epochs` | int | Maximum number of epochs. | `1000` |

View File

@ -5,4 +5,194 @@ tag: class
source: spacy/matcher/dependencymatcher.pyx
---
TODO: write
The `DependencyMatcher` follows the same API as the [`Matcher`](/api/matcher)
and [`PhraseMatcher`](/api/phrasematcher) and lets you match on dependency trees
using the
[Semgrex syntax](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html).
It requires a pretrained [`DependencyParser`](/api/parser) or other component
that sets the `Token.dep` attribute.
## Pattern format {#patterns}
> ```json
> ### Example
> [
> {
> "SPEC": {"NODE_NAME": "founded"},
> "PATTERN": {"ORTH": "founded"}
> },
> {
> "SPEC": {
> "NODE_NAME": "founder",
> "NBOR_RELOP": ">",
> "NBOR_NAME": "founded"
> },
> "PATTERN": {"DEP": "nsubj"}
> },
> {
> "SPEC": {
> "NODE_NAME": "object",
> "NBOR_RELOP": ">",
> "NBOR_NAME": "founded"
> },
> "PATTERN": {"DEP": "dobj"}
> }
> ]
> ```
A pattern added to the `DependencyMatcher` consists of a list of dictionaries,
with each dictionary describing a node to match. Each pattern should have the
following top-level keys:
| Name | Type | Description |
| --------- | ---- | --------------------------------------------------------------------------------------------------------------------------- |
| `PATTERN` | dict | The token attributes to match in the same format as patterns provided to the regular token-based [`Matcher`](/api/matcher). |
| `SPEC` | dict | The relationships of the nodes in the subtree that should be matched. |
The `SPEC` includes the following fields:
| Name | Type | Description |
| ------------ | ---- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `NODE_NAME` | str | A unique name for this node to refer to it in other specs. |
| `NBOR_RELOP` | str | A [Semgrex](https://nlp.stanford.edu/nlp/javadoc/javanlp/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.html) operator that describes how the two nodes are related. |
| `NBOR_NAME` | str | The unique name of the node that this node is connected to. |
## DependencyMatcher.\_\_init\_\_ {#init tag="method"}
Create a rule-based `DependencyMatcher`.
> #### Example
>
> ```python
> from spacy.matcher import DependencyMatcher
> matcher = DependencyMatcher(nlp.vocab)
> ```
| Name | Type | Description |
| ------- | ------- | ------------------------------------------------------------------------------------------- |
| `vocab` | `Vocab` | The vocabulary object, which must be shared with the documents the matcher will operate on. |
## DependencyMatcher.\_\call\_\_ {#call tag="method"}
Find all token sequences matching the supplied patterns on the `Doc` or `Span`.
> #### Example
>
> ```python
> from spacy.matcher import Matcher
>
> matcher = Matcher(nlp.vocab)
> pattern = [
> {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
> {"SPEC": {"NODE_NAME": "founder", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
> ]
> matcher.add("Founder", [pattern])
> doc = nlp("Bill Gates founded Microsoft.")
> matches = matcher(doc)
> ```
| Name | Type | Description |
| ----------- | ------------ | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
| `doclike` | `Doc`/`Span` | The `Doc` or `Span` to match over. |
| **RETURNS** | list | A list of `(match_id, start, end)` tuples, describing the matches. A match tuple describes a span `doc[start:end`]. The `match_id` is the ID of the added match pattern. |
## DependencyMatcher.\_\_len\_\_ {#len tag="method"}
Get the number of rules (edges) added to the dependency matcher. Note that this
only returns the number of rules (identical with the number of IDs), not the
number of individual patterns.
> #### Example
>
> ```python
> matcher = DependencyMatcher(nlp.vocab)
> assert len(matcher) == 0
> pattern = [
> {"SPEC": {"NODE_NAME": "founded"}, "PATTERN": {"ORTH": "founded"}},
> {"SPEC": {"NODE_NAME": "START_ENTITY", "NBOR_RELOP": ">", "NBOR_NAME": "founded"}, "PATTERN": {"DEP": "nsubj"}},
> ]
> matcher.add("Rule", [pattern])
> assert len(matcher) == 1
> ```
| Name | Type | Description |
| ----------- | ---- | -------------------- |
| **RETURNS** | int | The number of rules. |
## DependencyMatcher.\_\_contains\_\_ {#contains tag="method"}
Check whether the matcher contains rules for a match ID.
> #### Example
>
> ```python
> matcher = Matcher(nlp.vocab)
> assert "Rule" not in matcher
> matcher.add("Rule", [pattern])
> assert "Rule" in matcher
> ```
| Name | Type | Description |
| ----------- | ---- | ----------------------------------------------------- |
| `key` | str | The match ID. |
| **RETURNS** | bool | Whether the matcher contains rules for this match ID. |
## DependencyMatcher.add {#add tag="method"}
Add a rule to the matcher, consisting of an ID key, one or more patterns, and an
optional callback function to act on the matches. The callback function will
receive the arguments `matcher`, `doc`, `i` and `matches`. If a pattern already
exists for the given ID, the patterns will be extended. An `on_match` callback
will be overwritten.
> #### Example
>
> ```python
> def on_match(matcher, doc, id, matches):
> print('Matched!', matches)
>
> matcher = Matcher(nlp.vocab)
> matcher.add("TEST_PATTERNS", patterns)
> ```
| Name | Type | Description |
| -------------- | ------------------ | --------------------------------------------------------------------------------------------- |
| `match_id` | str | An ID for the thing you're matching. |
| `patterns` | list | Match pattern. A pattern consists of a list of dicts, where each dict describes a token. |
| _keyword-only_ | | |
| `on_match` | callable or `None` | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. |
## DependencyMatcher.remove {#remove tag="method"}
Remove a rule from the matcher. A `KeyError` is raised if the match ID does not
exist.
> #### Example
>
> ```python
> matcher.add("Rule", [pattern]])
> assert "Rule" in matcher
> matcher.remove("Rule")
> assert "Rule" not in matcher
> ```
| Name | Type | Description |
| ----- | ---- | ------------------------- |
| `key` | str | The ID of the match rule. |
## DependencyMatcher.get {#get tag="method"}
Retrieve the pattern stored for a key. Returns the rule as an
`(on_match, patterns)` tuple containing the callback and available patterns.
> #### Example
>
> ```python
> matcher.add("Rule", [pattern], on_match=on_match)
> on_match, patterns = matcher.get("Rule")
> ```
| Name | Type | Description |
| ----------- | ----- | --------------------------------------------- |
| `key` | str | The ID of the match rule. |
| **RETURNS** | tuple | The rule, as an `(on_match, patterns)` tuple. |

View File

@ -5,6 +5,82 @@ tag: class
source: spacy/matcher/matcher.pyx
---
The `Matcher` lets you find words and phrases using rules describing their token
attributes. Rules can refer to token annotations (like the text or
part-of-speech tags), as well as lexical attributes like `Token.is_punct`.
Applying the matcher to a [`Doc`](/api/doc) gives you access to the matched
tokens in context. For in-depth examples and workflows for combining rules and
statistical models, see the [usage guide](/usage/rule-based-matching) on
rule-based matching.
## Pattern format {#patterns}
> ```json
> ### Example
> [
> {"LOWER": "i"},
> {"LEMMA": {"IN": ["like", "love"]}},
> {"POS": "NOUN", "OP": "+"}
> ]
> ```
A pattern added to the `Matcher` consists of a list of dictionaries. Each
dictionary describes **one token** and its attributes. The available token
pattern keys correspond to a number of
[`Token` attributes](/api/token#attributes). The supported attributes for
rule-based matching are:
| Attribute | Type |  Description |
| -------------------------------------- | ---- | ------------------------------------------------------------------------------------------------------ |
| `ORTH` | str | The exact verbatim text of a token. |
| `TEXT` <Tag variant="new">2.1</Tag> | str | The exact verbatim text of a token. |
| `LOWER` | str | The lowercase form of the token text. |
|  `LENGTH` | int | The length of the token text. |
|  `IS_ALPHA`, `IS_ASCII`, `IS_DIGIT` | bool | Token text consists of alphabetic characters, ASCII characters, digits. |
|  `IS_LOWER`, `IS_UPPER`, `IS_TITLE` | bool | Token text is in lowercase, uppercase, titlecase. |
|  `IS_PUNCT`, `IS_SPACE`, `IS_STOP` | bool | Token is punctuation, whitespace, stop word. |
|  `LIKE_NUM`, `LIKE_URL`, `LIKE_EMAIL` | bool | Token text resembles a number, URL, email. |
|  `POS`, `TAG`, `DEP`, `LEMMA`, `SHAPE` | str | The token's simple and extended part-of-speech tag, dependency label, lemma, shape. |
| `ENT_TYPE` | str | The token's entity label. |
| `_` <Tag variant="new">2.1</Tag> | dict | Properties in [custom extension attributes](/usage/processing-pipelines#custom-components-attributes). |
| `OP` | str | Operator or quantifier to determine how often to match a token pattern. |
Operators and quantifiers define **how often** a token pattern should be
matched:
> ```json
> ### Example
> [
> {"POS": "ADJ", "OP": "*"},
> {"POS": "NOUN", "OP": "+"}
> ]
> ```
| OP | Description |
| --- | ---------------------------------------------------------------- |
| `!` | Negate the pattern, by requiring it to match exactly 0 times. |
| `?` | Make the pattern optional, by allowing it to match 0 or 1 times. |
| `+` | Require the pattern to match 1 or more times. |
| `*` | Allow the pattern to match zero or more times. |
Token patterns can also map to a **dictionary of properties** instead of a
single value to indicate whether the expected value is a member of a list or how
it compares to another value.
> ```json
> ### Example
> [
> {"LEMMA": {"IN": ["like", "love", "enjoy"]}},
> {"POS": "PROPN", "LENGTH": {">=": 10}},
> ]
> ```
| Attribute | Type | Description |
| -------------------------- | ---------- | --------------------------------------------------------------------------------- |
| `IN` | any | Attribute value is member of a list. |
| `NOT_IN` | any | Attribute value is _not_ member of a list. |
| `==`, `>=`, `<=`, `>`, `<` | int, float | Attribute value is equal, greater or equal, smaller or equal, greater or smaller. |
## Matcher.\_\_init\_\_ {#init tag="method"}
Create the rule-based `Matcher`. If `validate=True` is set, all patterns added
@ -60,7 +136,7 @@ Match a stream of documents, yielding them in turn.
| Name | Type | Description |
| --------------------------------------------- | -------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| `docs` | iterable | A stream of documents. |
| `docs` | iterable | A stream of documents or spans. |
| `batch_size` | int | The number of documents to accumulate into a working set. |
| `return_matches` <Tag variant="new">2.1</Tag> | bool | Yield the match lists along with the docs, making results `(doc, matches)` tuples. |
| `as_tuples` | bool | Interpret the input stream as `(doc, context)` tuples, and yield `(result, context)` tuples out. If both `return_matches` and `as_tuples` are `True`, the output will be a sequence of `((doc, matches), context)` tuples. |
@ -105,11 +181,11 @@ Check whether the matcher contains rules for a match ID.
## Matcher.add {#add tag="method" new="2"}
Add a rule to the matcher, consisting of an ID key, one or more patterns, and a
callback function to act on the matches. The callback function will receive the
arguments `matcher`, `doc`, `i` and `matches`. If a pattern already exists for
the given ID, the patterns will be extended. An `on_match` callback will be
overwritten.
Add a rule to the matcher, consisting of an ID key, one or more patterns, and an
optional callback function to act on the matches. The callback function will
receive the arguments `matcher`, `doc`, `i` and `matches`. If a pattern already
exists for the given ID, the patterns will be extended. An `on_match` callback
will be overwritten.
> #### Example
>
@ -141,12 +217,13 @@ patterns = [[{"TEXT": "Google"}, {"TEXT": "Now"}], [{"TEXT": "GoogleNow"}]]
</Infobox>
| Name | Type | Description |
| -------------- | ------------------ | --------------------------------------------------------------------------------------------- |
| `match_id` | str | An ID for the thing you're matching. |
| `patterns` | list | Match pattern. A pattern consists of a list of dicts, where each dict describes a token. |
| _keyword-only_ | | |
| `on_match` | callable or `None` | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. |
| Name | Type | Description |
| ----------------------------------- | ------------------ | --------------------------------------------------------------------------------------------- |
| `match_id` | str | An ID for the thing you're matching. |
| `patterns` | `List[List[dict]]` | Match pattern. A pattern consists of a list of dicts, where each dict describes a token. |
| _keyword-only_ | | |
| `on_match` | callable / `None` | Callback function to act on matches. Takes the arguments `matcher`, `doc`, `i` and `matches`. |
| `greedy` <Tag variant="new">3</Tag> | str | Optional filter for greedy matches. Can either be `"FIRST"` or `"LONGEST"`. |
## Matcher.remove {#remove tag="method" new="2"}

View File

@ -63,16 +63,14 @@ Create a new pipeline instance. In your application, you would normally use a
shortcut for this and instantiate the component using its string name and
[`nlp.add_pipe`](/api/language#add_pipe).
<!-- TODO: finish API docs -->
| Name | Type | Description |
| -------------- | ------- | ------------------------------------------------------------------------------------------- |
| `vocab` | `Vocab` | The shared vocabulary. |
| `model` | `Model` | The [`Model`](https://thinc.ai/docs/api-model) powering the pipeline component. |
| `name` | str | String name of the component instance. Used to add entries to the `losses` during training. |
| _keyword-only_ | | |
| `labels_morph` | dict | |
| `labels_pos` | dict | |
| `labels_morph` | dict | Mapping of morph + POS tags to morph labels. |
| `labels_pos` | dict | Mapping of morph + POS tags to POS tags. |
## Morphologizer.\_\_call\_\_ {#call tag="method"}

View File

@ -9,7 +9,8 @@ new: 2
The `PhraseMatcher` lets you efficiently match large terminology lists. While
the [`Matcher`](/api/matcher) lets you match sequences based on lists of token
descriptions, the `PhraseMatcher` accepts match patterns in the form of `Doc`
objects.
objects. See the [usage guide](/usage/rule-based-matching#phrasematcher) for
examples.
## PhraseMatcher.\_\_init\_\_ {#init tag="method"}