From eb65c43b5c23b1d6eadfe2b694c37e726d2f2d1e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 19 Dec 2022 16:06:11 +0100 Subject: [PATCH] Update docs for fuzzy_compare in components --- spacy/pipeline/entityruler.py | 3 ++- spacy/pipeline/span_ruler.py | 3 ++- website/docs/api/entityruler.md | 31 ++++++++++++++++--------------- website/docs/api/spanruler.md | 21 +++++++++++---------- 4 files changed, 31 insertions(+), 27 deletions(-) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 06b02be05..7d54f110e 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -113,7 +113,8 @@ class EntityRuler(Pipe): ent_id_sep (str): Separator used internally for entity IDs. scorer (Optional[Callable]): The scoring method. Defaults to spacy.scorer.get_ner_prf. - fuzzy_compare (Callable): The fuzzy comparison method. + fuzzy_compare (Callable): The fuzzy comparison method for the internal + Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare. DOCS: https://spacy.io/api/entityruler#init """ diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py index d287e6b1c..94a22e255 100644 --- a/spacy/pipeline/span_ruler.py +++ b/spacy/pipeline/span_ruler.py @@ -261,7 +261,8 @@ class SpanRuler(Pipe): `annotate_ents` is set. Defaults to `True`. scorer (Optional[Callable]): The scoring method. Defaults to spacy.pipeline.span_ruler.overlapping_labeled_spans_score. - fuzzy_compare (Callable): The default fuzzy comparison method. + fuzzy_compare (Callable): The fuzzy comparison method for the internal + Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare. DOCS: https://spacy.io/api/spanruler#init """ diff --git a/website/docs/api/entityruler.md b/website/docs/api/entityruler.md index c2ba33f01..7a12d597b 100644 --- a/website/docs/api/entityruler.md +++ b/website/docs/api/entityruler.md @@ -55,13 +55,14 @@ how the component should be configured. You can override its settings via the > nlp.add_pipe("entity_ruler", config=config) > ``` -| Setting | Description | -| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ | -| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ | -| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ | -| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ | -| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ | +| Setting | Description | +| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ | +| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ | +| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ | +| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ | +| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ | +| `fuzzy_compare` 3.5.0 | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ | ```python %%GITHUB_SPACY/spacy/pipeline/entityruler.py @@ -99,9 +100,9 @@ be a token pattern (list) or a phrase pattern (string). For example: ## EntityRuler.initialize {#initialize tag="method" new="3"} Initialize the component with data and used before training to load in rules -from a [pattern file](/usage/rule-based-matching/#entityruler-files). This method -is typically called by [`Language.initialize`](/api/language#initialize) and -lets you customize arguments it receives via the +from a [pattern file](/usage/rule-based-matching/#entityruler-files). This +method is typically called by [`Language.initialize`](/api/language#initialize) +and lets you customize arguments it receives via the [`[initialize.components]`](/api/data-formats#config-initialize) block in the config. @@ -210,10 +211,10 @@ of dicts) or a phrase pattern (string). For more details, see the usage guide on | ---------- | ---------------------------------------------------------------- | | `patterns` | The patterns to add. ~~List[Dict[str, Union[str, List[dict]]]]~~ | - ## EntityRuler.remove {#remove tag="method" new="3.2.1"} -Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if the ID does not exist. +Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if +the ID does not exist. > #### Example > @@ -224,9 +225,9 @@ Remove a pattern by its ID from the entity ruler. A `ValueError` is raised if th > ruler.remove("apple") > ``` -| Name | Description | -| ---------- | ---------------------------------------------------------------- | -| `id` | The ID of the pattern rule. ~~str~~ | +| Name | Description | +| ---- | ----------------------------------- | +| `id` | The ID of the pattern rule. ~~str~~ | ## EntityRuler.to_disk {#to_disk tag="method"} diff --git a/website/docs/api/spanruler.md b/website/docs/api/spanruler.md index b573f7c58..cd6c389df 100644 --- a/website/docs/api/spanruler.md +++ b/website/docs/api/spanruler.md @@ -46,16 +46,17 @@ how the component should be configured. You can override its settings via the > nlp.add_pipe("span_ruler", config=config) > ``` -| Setting | Description | -| --------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ | -| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ | -| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ | -| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ | -| `phrase_matcher_attr` | Token attribute to match on, passed to the internal PhraseMatcher as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ | -| `validate` | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~ | -| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ | -| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | +| Setting | Description | +| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ | +| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ | +| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ | +| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ | +| `phrase_matcher_attr` | Token attribute to match on, passed to the internal `PhraseMatcher` as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ | +| `validate` | Whether patterns should be validated, passed to `Matcher` and `PhraseMatcher` as `validate`. Defaults to `False`. ~~bool~~ | +| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ | +| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ | +| `fuzzy_compare` 3.5.0 | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ | ```python %%GITHUB_SPACY/spacy/pipeline/span_ruler.py