mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-05 21:00:19 +03:00
add fuzzy param to EntityMatcher
This commit is contained in:
parent
1f2e57eca4
commit
b617382dc6
|
@ -28,6 +28,7 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
|
|||
"overwrite_ents": False,
|
||||
"ent_id_sep": DEFAULT_ENT_ID_SEP,
|
||||
"scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"},
|
||||
"fuzzy": None,
|
||||
},
|
||||
default_score_weights={
|
||||
"ents_f": 1.0,
|
||||
|
@ -44,6 +45,7 @@ def make_entity_ruler(
|
|||
overwrite_ents: bool,
|
||||
ent_id_sep: str,
|
||||
scorer: Optional[Callable],
|
||||
fuzzy: Optional[float],
|
||||
):
|
||||
return EntityRuler(
|
||||
nlp,
|
||||
|
@ -53,6 +55,7 @@ def make_entity_ruler(
|
|||
overwrite_ents=overwrite_ents,
|
||||
ent_id_sep=ent_id_sep,
|
||||
scorer=scorer,
|
||||
fuzzy=fuzzy,
|
||||
)
|
||||
|
||||
|
||||
|
@ -87,6 +90,7 @@ class EntityRuler(Pipe):
|
|||
ent_id_sep: str = DEFAULT_ENT_ID_SEP,
|
||||
patterns: Optional[List[PatternType]] = None,
|
||||
scorer: Optional[Callable] = entity_ruler_score,
|
||||
fuzzy: Optional[float] = None,
|
||||
) -> None:
|
||||
"""Initialize the entity ruler. If patterns are supplied here, they
|
||||
need to be a list of dictionaries with a `"label"` and `"pattern"`
|
||||
|
@ -118,7 +122,7 @@ class EntityRuler(Pipe):
|
|||
self.token_patterns = defaultdict(list) # type: ignore
|
||||
self.phrase_patterns = defaultdict(list) # type: ignore
|
||||
self._validate = validate
|
||||
self.matcher = Matcher(nlp.vocab, validate=validate)
|
||||
self.matcher = Matcher(nlp.vocab, validate=validate, fuzzy=fuzzy)
|
||||
self.phrase_matcher_attr = phrase_matcher_attr
|
||||
self.phrase_matcher = PhraseMatcher(
|
||||
nlp.vocab, attr=self.phrase_matcher_attr, validate=validate
|
||||
|
@ -128,6 +132,7 @@ class EntityRuler(Pipe):
|
|||
if patterns is not None:
|
||||
self.add_patterns(patterns)
|
||||
self.scorer = scorer
|
||||
self.fuzzy = fuzzy
|
||||
|
||||
def __len__(self) -> int:
|
||||
"""The number of all patterns added to the entity ruler."""
|
||||
|
@ -338,7 +343,7 @@ class EntityRuler(Pipe):
|
|||
self.token_patterns = defaultdict(list)
|
||||
self.phrase_patterns = defaultdict(list)
|
||||
self._ent_ids = defaultdict(tuple)
|
||||
self.matcher = Matcher(self.nlp.vocab, validate=self._validate)
|
||||
self.matcher = Matcher(self.nlp.vocab, validate=self._validate, fuzzy=self.fuzzy)
|
||||
self.phrase_matcher = PhraseMatcher(
|
||||
self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user