mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 12:50:20 +03:00
Rename EntityRuler and SpanRuler setting to matcher_fuzzy_compare
To having naming similar to `phrase_matcher_attr`, rename `fuzzy_compare` setting for `EntityRuler` and `SpanRuler` to `matcher_fuzzy_compare. Organize next to `phrase_matcher_attr` in docs.
This commit is contained in:
parent
96a786dc98
commit
903c4af880
|
@ -24,11 +24,11 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
|
||||||
assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
|
assigns=["doc.ents", "token.ent_type", "token.ent_iob"],
|
||||||
default_config={
|
default_config={
|
||||||
"phrase_matcher_attr": None,
|
"phrase_matcher_attr": None,
|
||||||
|
"matcher_fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
||||||
"validate": False,
|
"validate": False,
|
||||||
"overwrite_ents": False,
|
"overwrite_ents": False,
|
||||||
"ent_id_sep": DEFAULT_ENT_ID_SEP,
|
"ent_id_sep": DEFAULT_ENT_ID_SEP,
|
||||||
"scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
|
"scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
|
||||||
"fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"ents_f": 1.0,
|
"ents_f": 1.0,
|
||||||
|
@ -41,21 +41,21 @@ def make_entity_ruler(
|
||||||
nlp: Language,
|
nlp: Language,
|
||||||
name: str,
|
name: str,
|
||||||
phrase_matcher_attr: Optional[Union[int, str]],
|
phrase_matcher_attr: Optional[Union[int, str]],
|
||||||
|
matcher_fuzzy_compare: Callable,
|
||||||
validate: bool,
|
validate: bool,
|
||||||
overwrite_ents: bool,
|
overwrite_ents: bool,
|
||||||
ent_id_sep: str,
|
ent_id_sep: str,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
fuzzy_compare: Callable,
|
|
||||||
):
|
):
|
||||||
return EntityRuler(
|
return EntityRuler(
|
||||||
nlp,
|
nlp,
|
||||||
name,
|
name,
|
||||||
phrase_matcher_attr=phrase_matcher_attr,
|
phrase_matcher_attr=phrase_matcher_attr,
|
||||||
|
matcher_fuzzy_compare=matcher_fuzzy_compare,
|
||||||
validate=validate,
|
validate=validate,
|
||||||
overwrite_ents=overwrite_ents,
|
overwrite_ents=overwrite_ents,
|
||||||
ent_id_sep=ent_id_sep,
|
ent_id_sep=ent_id_sep,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
fuzzy_compare=fuzzy_compare,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -85,12 +85,12 @@ class EntityRuler(Pipe):
|
||||||
name: str = "entity_ruler",
|
name: str = "entity_ruler",
|
||||||
*,
|
*,
|
||||||
phrase_matcher_attr: Optional[Union[int, str]] = None,
|
phrase_matcher_attr: Optional[Union[int, str]] = None,
|
||||||
|
matcher_fuzzy_compare: Callable = fuzzy_compare,
|
||||||
validate: bool = False,
|
validate: bool = False,
|
||||||
overwrite_ents: bool = False,
|
overwrite_ents: bool = False,
|
||||||
ent_id_sep: str = DEFAULT_ENT_ID_SEP,
|
ent_id_sep: str = DEFAULT_ENT_ID_SEP,
|
||||||
patterns: Optional[List[PatternType]] = None,
|
patterns: Optional[List[PatternType]] = None,
|
||||||
scorer: Optional[Callable] = entity_ruler_score,
|
scorer: Optional[Callable] = entity_ruler_score,
|
||||||
fuzzy_compare: Callable = fuzzy_compare,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the entity ruler. If patterns are supplied here, they
|
"""Initialize the entity ruler. If patterns are supplied here, they
|
||||||
need to be a list of dictionaries with a `"label"` and `"pattern"`
|
need to be a list of dictionaries with a `"label"` and `"pattern"`
|
||||||
|
@ -104,7 +104,9 @@ class EntityRuler(Pipe):
|
||||||
added. Used to disable the current entity ruler while creating
|
added. Used to disable the current entity ruler while creating
|
||||||
phrase patterns with the nlp object.
|
phrase patterns with the nlp object.
|
||||||
phrase_matcher_attr (int / str): Token attribute to match on, passed
|
phrase_matcher_attr (int / str): Token attribute to match on, passed
|
||||||
to the internal PhraseMatcher as `attr`
|
to the internal PhraseMatcher as `attr`.
|
||||||
|
matcher_fuzzy_compare (Callable): The fuzzy comparison method for the
|
||||||
|
internal Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare.
|
||||||
validate (bool): Whether patterns should be validated, passed to
|
validate (bool): Whether patterns should be validated, passed to
|
||||||
Matcher and PhraseMatcher as `validate`
|
Matcher and PhraseMatcher as `validate`
|
||||||
patterns (iterable): Optional patterns to load in.
|
patterns (iterable): Optional patterns to load in.
|
||||||
|
@ -113,8 +115,6 @@ class EntityRuler(Pipe):
|
||||||
ent_id_sep (str): Separator used internally for entity IDs.
|
ent_id_sep (str): Separator used internally for entity IDs.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
spacy.scorer.get_ner_prf.
|
spacy.scorer.get_ner_prf.
|
||||||
fuzzy_compare (Callable): The fuzzy comparison method for the internal
|
|
||||||
Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare.
|
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/entityruler#init
|
DOCS: https://spacy.io/api/entityruler#init
|
||||||
"""
|
"""
|
||||||
|
@ -124,9 +124,9 @@ class EntityRuler(Pipe):
|
||||||
self.token_patterns = defaultdict(list) # type: ignore
|
self.token_patterns = defaultdict(list) # type: ignore
|
||||||
self.phrase_patterns = defaultdict(list) # type: ignore
|
self.phrase_patterns = defaultdict(list) # type: ignore
|
||||||
self._validate = validate
|
self._validate = validate
|
||||||
self._fuzzy_compare = fuzzy_compare
|
self.matcher_fuzzy_compare = matcher_fuzzy_compare
|
||||||
self.matcher = Matcher(
|
self.matcher = Matcher(
|
||||||
nlp.vocab, validate=validate, fuzzy_compare=fuzzy_compare
|
nlp.vocab, validate=validate, fuzzy_compare=self.matcher_fuzzy_compare
|
||||||
)
|
)
|
||||||
self.phrase_matcher_attr = phrase_matcher_attr
|
self.phrase_matcher_attr = phrase_matcher_attr
|
||||||
self.phrase_matcher = PhraseMatcher(
|
self.phrase_matcher = PhraseMatcher(
|
||||||
|
@ -349,7 +349,9 @@ class EntityRuler(Pipe):
|
||||||
self.phrase_patterns = defaultdict(list)
|
self.phrase_patterns = defaultdict(list)
|
||||||
self._ent_ids = defaultdict(tuple)
|
self._ent_ids = defaultdict(tuple)
|
||||||
self.matcher = Matcher(
|
self.matcher = Matcher(
|
||||||
self.nlp.vocab, validate=self._validate, fuzzy_compare=self._fuzzy_compare
|
self.nlp.vocab,
|
||||||
|
validate=self._validate,
|
||||||
|
fuzzy_compare=self.matcher_fuzzy_compare,
|
||||||
)
|
)
|
||||||
self.phrase_matcher = PhraseMatcher(
|
self.phrase_matcher = PhraseMatcher(
|
||||||
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
|
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
|
||||||
|
@ -444,7 +446,8 @@ class EntityRuler(Pipe):
|
||||||
self.overwrite = cfg.get("overwrite", False)
|
self.overwrite = cfg.get("overwrite", False)
|
||||||
self.phrase_matcher_attr = cfg.get("phrase_matcher_attr", None)
|
self.phrase_matcher_attr = cfg.get("phrase_matcher_attr", None)
|
||||||
self.phrase_matcher = PhraseMatcher(
|
self.phrase_matcher = PhraseMatcher(
|
||||||
self.nlp.vocab, attr=self.phrase_matcher_attr,
|
self.nlp.vocab,
|
||||||
|
attr=self.phrase_matcher_attr,
|
||||||
)
|
)
|
||||||
self.ent_id_sep = cfg.get("ent_id_sep", DEFAULT_ENT_ID_SEP)
|
self.ent_id_sep = cfg.get("ent_id_sep", DEFAULT_ENT_ID_SEP)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -29,7 +29,7 @@ DEFAULT_SPANS_KEY = "ruler"
|
||||||
"overwrite_ents": False,
|
"overwrite_ents": False,
|
||||||
"scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
|
"scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
|
||||||
"ent_id_sep": "__unused__",
|
"ent_id_sep": "__unused__",
|
||||||
"fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
"matcher_fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
"ents_f": 1.0,
|
"ents_f": 1.0,
|
||||||
|
@ -42,11 +42,11 @@ def make_entity_ruler(
|
||||||
nlp: Language,
|
nlp: Language,
|
||||||
name: str,
|
name: str,
|
||||||
phrase_matcher_attr: Optional[Union[int, str]],
|
phrase_matcher_attr: Optional[Union[int, str]],
|
||||||
|
matcher_fuzzy_compare: Callable,
|
||||||
validate: bool,
|
validate: bool,
|
||||||
overwrite_ents: bool,
|
overwrite_ents: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
ent_id_sep: str,
|
ent_id_sep: str,
|
||||||
fuzzy_compare: Callable,
|
|
||||||
):
|
):
|
||||||
if overwrite_ents:
|
if overwrite_ents:
|
||||||
ents_filter = prioritize_new_ents_filter
|
ents_filter = prioritize_new_ents_filter
|
||||||
|
@ -60,10 +60,10 @@ def make_entity_ruler(
|
||||||
annotate_ents=True,
|
annotate_ents=True,
|
||||||
ents_filter=ents_filter,
|
ents_filter=ents_filter,
|
||||||
phrase_matcher_attr=phrase_matcher_attr,
|
phrase_matcher_attr=phrase_matcher_attr,
|
||||||
|
matcher_fuzzy_compare=matcher_fuzzy_compare,
|
||||||
validate=validate,
|
validate=validate,
|
||||||
overwrite=False,
|
overwrite=False,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
fuzzy_compare=fuzzy_compare,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,13 +76,13 @@ def make_entity_ruler(
|
||||||
"annotate_ents": False,
|
"annotate_ents": False,
|
||||||
"ents_filter": {"@misc": "spacy.first_longest_spans_filter.v1"},
|
"ents_filter": {"@misc": "spacy.first_longest_spans_filter.v1"},
|
||||||
"phrase_matcher_attr": None,
|
"phrase_matcher_attr": None,
|
||||||
|
"matcher_fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
||||||
"validate": False,
|
"validate": False,
|
||||||
"overwrite": True,
|
"overwrite": True,
|
||||||
"scorer": {
|
"scorer": {
|
||||||
"@scorers": "spacy.overlapping_labeled_spans_scorer.v1",
|
"@scorers": "spacy.overlapping_labeled_spans_scorer.v1",
|
||||||
"spans_key": DEFAULT_SPANS_KEY,
|
"spans_key": DEFAULT_SPANS_KEY,
|
||||||
},
|
},
|
||||||
"fuzzy_compare": {"@misc": "spacy.fuzzy_compare.v1"},
|
|
||||||
},
|
},
|
||||||
default_score_weights={
|
default_score_weights={
|
||||||
f"spans_{DEFAULT_SPANS_KEY}_f": 1.0,
|
f"spans_{DEFAULT_SPANS_KEY}_f": 1.0,
|
||||||
|
@ -99,10 +99,10 @@ def make_span_ruler(
|
||||||
annotate_ents: bool,
|
annotate_ents: bool,
|
||||||
ents_filter: Callable[[Iterable[Span], Iterable[Span]], Iterable[Span]],
|
ents_filter: Callable[[Iterable[Span], Iterable[Span]], Iterable[Span]],
|
||||||
phrase_matcher_attr: Optional[Union[int, str]],
|
phrase_matcher_attr: Optional[Union[int, str]],
|
||||||
|
matcher_fuzzy_compare: Callable,
|
||||||
validate: bool,
|
validate: bool,
|
||||||
overwrite: bool,
|
overwrite: bool,
|
||||||
scorer: Optional[Callable],
|
scorer: Optional[Callable],
|
||||||
fuzzy_compare: Callable,
|
|
||||||
):
|
):
|
||||||
return SpanRuler(
|
return SpanRuler(
|
||||||
nlp,
|
nlp,
|
||||||
|
@ -112,10 +112,10 @@ def make_span_ruler(
|
||||||
annotate_ents=annotate_ents,
|
annotate_ents=annotate_ents,
|
||||||
ents_filter=ents_filter,
|
ents_filter=ents_filter,
|
||||||
phrase_matcher_attr=phrase_matcher_attr,
|
phrase_matcher_attr=phrase_matcher_attr,
|
||||||
|
matcher_fuzzy_compare=matcher_fuzzy_compare,
|
||||||
validate=validate,
|
validate=validate,
|
||||||
overwrite=overwrite,
|
overwrite=overwrite,
|
||||||
scorer=scorer,
|
scorer=scorer,
|
||||||
fuzzy_compare=fuzzy_compare,
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -223,12 +223,12 @@ class SpanRuler(Pipe):
|
||||||
[Iterable[Span], Iterable[Span]], Iterable[Span]
|
[Iterable[Span], Iterable[Span]], Iterable[Span]
|
||||||
] = util.filter_chain_spans,
|
] = util.filter_chain_spans,
|
||||||
phrase_matcher_attr: Optional[Union[int, str]] = None,
|
phrase_matcher_attr: Optional[Union[int, str]] = None,
|
||||||
|
matcher_fuzzy_compare: Callable = fuzzy_compare,
|
||||||
validate: bool = False,
|
validate: bool = False,
|
||||||
overwrite: bool = False,
|
overwrite: bool = False,
|
||||||
scorer: Optional[Callable] = partial(
|
scorer: Optional[Callable] = partial(
|
||||||
overlapping_labeled_spans_score, spans_key=DEFAULT_SPANS_KEY
|
overlapping_labeled_spans_score, spans_key=DEFAULT_SPANS_KEY
|
||||||
),
|
),
|
||||||
fuzzy_compare: Callable = fuzzy_compare,
|
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize the span ruler. If patterns are supplied here, they
|
"""Initialize the span ruler. If patterns are supplied here, they
|
||||||
need to be a list of dictionaries with a `"label"` and `"pattern"`
|
need to be a list of dictionaries with a `"label"` and `"pattern"`
|
||||||
|
@ -254,6 +254,8 @@ class SpanRuler(Pipe):
|
||||||
phrase_matcher_attr (Optional[Union[int, str]]): Token attribute to
|
phrase_matcher_attr (Optional[Union[int, str]]): Token attribute to
|
||||||
match on, passed to the internal PhraseMatcher as `attr`. Defaults
|
match on, passed to the internal PhraseMatcher as `attr`. Defaults
|
||||||
to `None`.
|
to `None`.
|
||||||
|
matcher_fuzzy_compare (Callable): The fuzzy comparison method for the
|
||||||
|
internal Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare.
|
||||||
validate (bool): Whether patterns should be validated, passed to
|
validate (bool): Whether patterns should be validated, passed to
|
||||||
Matcher and PhraseMatcher as `validate`.
|
Matcher and PhraseMatcher as `validate`.
|
||||||
overwrite (bool): Whether to remove any existing spans under this spans
|
overwrite (bool): Whether to remove any existing spans under this spans
|
||||||
|
@ -261,8 +263,6 @@ class SpanRuler(Pipe):
|
||||||
`annotate_ents` is set. Defaults to `True`.
|
`annotate_ents` is set. Defaults to `True`.
|
||||||
scorer (Optional[Callable]): The scoring method. Defaults to
|
scorer (Optional[Callable]): The scoring method. Defaults to
|
||||||
spacy.pipeline.span_ruler.overlapping_labeled_spans_score.
|
spacy.pipeline.span_ruler.overlapping_labeled_spans_score.
|
||||||
fuzzy_compare (Callable): The fuzzy comparison method for the internal
|
|
||||||
Matcher. Defaults to spacy.matcher.matcher.fuzzy_compare.
|
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/spanruler#init
|
DOCS: https://spacy.io/api/spanruler#init
|
||||||
"""
|
"""
|
||||||
|
@ -276,7 +276,7 @@ class SpanRuler(Pipe):
|
||||||
self.spans_filter = spans_filter
|
self.spans_filter = spans_filter
|
||||||
self.ents_filter = ents_filter
|
self.ents_filter = ents_filter
|
||||||
self.scorer = scorer
|
self.scorer = scorer
|
||||||
self.fuzzy_compare = fuzzy_compare
|
self.matcher_fuzzy_compare = matcher_fuzzy_compare
|
||||||
self._match_label_id_map: Dict[int, Dict[str, str]] = {}
|
self._match_label_id_map: Dict[int, Dict[str, str]] = {}
|
||||||
self.clear()
|
self.clear()
|
||||||
|
|
||||||
|
@ -465,7 +465,7 @@ class SpanRuler(Pipe):
|
||||||
self.matcher: Matcher = Matcher(
|
self.matcher: Matcher = Matcher(
|
||||||
self.nlp.vocab,
|
self.nlp.vocab,
|
||||||
validate=self.validate,
|
validate=self.validate,
|
||||||
fuzzy_compare=self.fuzzy_compare,
|
fuzzy_compare=self.matcher_fuzzy_compare,
|
||||||
)
|
)
|
||||||
self.phrase_matcher: PhraseMatcher = PhraseMatcher(
|
self.phrase_matcher: PhraseMatcher = PhraseMatcher(
|
||||||
self.nlp.vocab,
|
self.nlp.vocab,
|
||||||
|
|
|
@ -411,7 +411,7 @@ def test_entity_ruler_fuzzy_disabled(nlp, entity_ruler_factory):
|
||||||
ruler = nlp.add_pipe(
|
ruler = nlp.add_pipe(
|
||||||
entity_ruler_factory,
|
entity_ruler_factory,
|
||||||
name="entity_ruler",
|
name="entity_ruler",
|
||||||
config={"fuzzy_compare": {"@misc": "test_fuzzy_compare_disabled"}},
|
config={"matcher_fuzzy_compare": {"@misc": "test_fuzzy_compare_disabled"}},
|
||||||
)
|
)
|
||||||
patterns = [{"label": "HELLO", "pattern": [{"LOWER": {"FUZZY": "hello"}}]}]
|
patterns = [{"label": "HELLO", "pattern": [{"LOWER": {"FUZZY": "hello"}}]}]
|
||||||
ruler.add_patterns(patterns)
|
ruler.add_patterns(patterns)
|
||||||
|
|
|
@ -55,14 +55,14 @@ how the component should be configured. You can override its settings via the
|
||||||
> nlp.add_pipe("entity_ruler", config=config)
|
> nlp.add_pipe("entity_ruler", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
||||||
| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ |
|
| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.matcher.fuzzy_compare`. ~~Callable~~ |
|
||||||
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
|
| `validate` | Whether patterns should be validated (passed to the `Matcher` and `PhraseMatcher`). Defaults to `False`. ~~bool~~ |
|
||||||
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
|
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
|
||||||
| `fuzzy_compare` <Tag variant="new">3.5.0</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ |
|
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/entityruler.py
|
%%GITHUB_SPACY/spacy/pipeline/entityruler.py
|
||||||
|
@ -86,18 +86,18 @@ be a token pattern (list) or a phrase pattern (string). For example:
|
||||||
> ruler = EntityRuler(nlp, overwrite_ents=True)
|
> ruler = EntityRuler(nlp, overwrite_ents=True)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `nlp` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. ~~Language~~ |
|
| `nlp` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. ~~Language~~ |
|
||||||
| `name` <Tag variant="new">3</Tag> | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. ~~str~~ |
|
| `name` <Tag variant="new">3</Tag> | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current entity ruler while creating phrase patterns with the nlp object. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
| `phrase_matcher_attr` | Optional attribute name match on for the internal [`PhraseMatcher`](/api/phrasematcher), e.g. `LOWER` to match on the lowercase token text. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
||||||
| `validate` | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~ |
|
| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.matcher.fuzzy_compare`. ~~Callable~~ |
|
||||||
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
|
| `validate` | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~ |
|
||||||
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
|
| `overwrite_ents` | If existing entities are present, e.g. entities added by the model, overwrite them by matches if necessary. Defaults to `False`. ~~bool~~ |
|
||||||
| `patterns` | Optional patterns to load in on initialization. ~~Optional[List[Dict[str, Union[str, List[dict]]]]]~~ |
|
| `ent_id_sep` | Separator used internally for entity IDs. Defaults to `"\|\|"`. ~~str~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
| `patterns` | Optional patterns to load in on initialization. ~~Optional[List[Dict[str, Union[str, List[dict]]]]]~~ |
|
||||||
| `fuzzy_compare` <Tag variant="new">3.5.0</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ |
|
| `scorer` | The scoring method. Defaults to [`spacy.scorer.get_ner_prf`](/api/scorer#get_ner_prf). ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
## EntityRuler.initialize {#initialize tag="method" new="3"}
|
## EntityRuler.initialize {#initialize tag="method" new="3"}
|
||||||
|
|
||||||
|
|
|
@ -46,17 +46,17 @@ how the component should be configured. You can override its settings via the
|
||||||
> nlp.add_pipe("span_ruler", config=config)
|
> nlp.add_pipe("span_ruler", config=config)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Setting | Description |
|
| Setting | Description |
|
||||||
| ---------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ |
|
| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ |
|
||||||
| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ |
|
| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ |
|
||||||
| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ |
|
| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ |
|
||||||
| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ |
|
| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ |
|
||||||
| `phrase_matcher_attr` | Token attribute to match on, passed to the internal `PhraseMatcher` as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
| `phrase_matcher_attr` | Token attribute to match on, passed to the internal `PhraseMatcher` as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
||||||
| `validate` | Whether patterns should be validated, passed to `Matcher` and `PhraseMatcher` as `validate`. Defaults to `False`. ~~bool~~ |
|
| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.matcher.fuzzy_compare`. ~~Callable~~ |
|
||||||
| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ |
|
| `validate` | Whether patterns should be validated, passed to `Matcher` and `PhraseMatcher` as `validate`. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ |
|
||||||
| `fuzzy_compare` <Tag variant="new">3.5.0</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
```python
|
```python
|
||||||
%%GITHUB_SPACY/spacy/pipeline/span_ruler.py
|
%%GITHUB_SPACY/spacy/pipeline/span_ruler.py
|
||||||
|
@ -80,20 +80,20 @@ token pattern (list) or a phrase pattern (string). For example:
|
||||||
> ruler = SpanRuler(nlp, overwrite=True)
|
> ruler = SpanRuler(nlp, overwrite=True)
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `nlp` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. ~~Language~~ |
|
| `nlp` | The shared nlp object to pass the vocab to the matchers and process phrase patterns. ~~Language~~ |
|
||||||
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current span ruler while creating phrase patterns with the nlp object. ~~str~~ |
|
| `name` | Instance name of the current pipeline component. Typically passed in automatically from the factory when the component is added. Used to disable the current span ruler while creating phrase patterns with the nlp object. ~~str~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ |
|
| `spans_key` | The spans key to save the spans under. If `None`, no spans are saved. Defaults to `"ruler"`. ~~Optional[str]~~ |
|
||||||
| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ |
|
| `spans_filter` | The optional method to filter spans before they are assigned to doc.spans. Defaults to `None`. ~~Optional[Callable[[Iterable[Span], Iterable[Span]], List[Span]]]~~ |
|
||||||
| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ |
|
| `annotate_ents` | Whether to save spans to doc.ents. Defaults to `False`. ~~bool~~ |
|
||||||
| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ |
|
| `ents_filter` | The method to filter spans before they are assigned to doc.ents. Defaults to `util.filter_chain_spans`. ~~Callable[[Iterable[Span], Iterable[Span]], List[Span]]~~ |
|
||||||
| `phrase_matcher_attr` | Token attribute to match on, passed to the internal PhraseMatcher as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
| `phrase_matcher_attr` | Token attribute to match on, passed to the internal PhraseMatcher as `attr`. Defaults to `None`. ~~Optional[Union[int, str]]~~ |
|
||||||
| `validate` | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~ |
|
| `matcher_fuzzy_compare` <Tag variant="new">3.5</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. Defaults to `spacy.matcher.matcher.fuzzy_compare`. ~~Callable~~ |
|
||||||
| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ |
|
| `validate` | Whether patterns should be validated, passed to Matcher and PhraseMatcher as `validate`. Defaults to `False`. ~~bool~~ |
|
||||||
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
| `overwrite` | Whether to remove any existing spans under `Doc.spans[spans key]` if `spans_key` is set, or to remove any ents under `Doc.ents` if `annotate_ents` is set. Defaults to `True`. ~~bool~~ |
|
||||||
| `fuzzy_compare` <Tag variant="new">3.5.0</Tag> | The fuzzy comparison method, passed on to the internal `Matcher`. ~~Callable~~ |
|
| `scorer` | The scoring method. Defaults to [`Scorer.score_spans`](/api/scorer#score_spans) for `Doc.spans[spans_key]` with overlapping spans allowed. ~~Optional[Callable]~~ |
|
||||||
|
|
||||||
## SpanRuler.initialize {#initialize tag="method"}
|
## SpanRuler.initialize {#initialize tag="method"}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user