From 66e9fdd2463fa522a4d1d9f5792359152dc418dd Mon Sep 17 00:00:00 2001 From: Kevin Humphreys Date: Wed, 24 Aug 2022 13:13:27 +0200 Subject: [PATCH] add fuzzy param to EntityMatcher --- spacy/pipeline/entityruler.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 3cb1ca676..d1b05334e 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -28,6 +28,7 @@ PatternType = Dict[str, Union[str, List[Dict[str, Any]]]] "overwrite_ents": False, "ent_id_sep": DEFAULT_ENT_ID_SEP, "scorer": {"@scorers": "spacy.entity_ruler_scorer.v1"}, + "fuzzy": None, }, default_score_weights={ "ents_f": 1.0, @@ -44,6 +45,7 @@ def make_entity_ruler( overwrite_ents: bool, ent_id_sep: str, scorer: Optional[Callable], + fuzzy: Optional[float], ): return EntityRuler( nlp, @@ -53,6 +55,7 @@ def make_entity_ruler( overwrite_ents=overwrite_ents, ent_id_sep=ent_id_sep, scorer=scorer, + fuzzy=fuzzy, ) @@ -87,6 +90,7 @@ class EntityRuler(Pipe): ent_id_sep: str = DEFAULT_ENT_ID_SEP, patterns: Optional[List[PatternType]] = None, scorer: Optional[Callable] = entity_ruler_score, + fuzzy: Optional[float] = None, ) -> None: """Initialize the entity ruler. If patterns are supplied here, they need to be a list of dictionaries with a `"label"` and `"pattern"` @@ -118,7 +122,7 @@ class EntityRuler(Pipe): self.token_patterns = defaultdict(list) # type: ignore self.phrase_patterns = defaultdict(list) # type: ignore self._validate = validate - self.matcher = Matcher(nlp.vocab, validate=validate) + self.matcher = Matcher(nlp.vocab, validate=validate, fuzzy=fuzzy) self.phrase_matcher_attr = phrase_matcher_attr self.phrase_matcher = PhraseMatcher( nlp.vocab, attr=self.phrase_matcher_attr, validate=validate @@ -128,6 +132,7 @@ class EntityRuler(Pipe): if patterns is not None: self.add_patterns(patterns) self.scorer = scorer + self.fuzzy = fuzzy def __len__(self) -> int: """The number of all patterns added to the entity ruler.""" @@ -338,7 +343,7 @@ class EntityRuler(Pipe): self.token_patterns = defaultdict(list) self.phrase_patterns = defaultdict(list) self._ent_ids = defaultdict(tuple) - self.matcher = Matcher(self.nlp.vocab, validate=self._validate) + self.matcher = Matcher(self.nlp.vocab, validate=self._validate, fuzzy=self.fuzzy) self.phrase_matcher = PhraseMatcher( self.nlp.vocab, attr=self.phrase_matcher_attr, validate=self._validate )