From 3c6dc10d60c7af3bb7c5ddd17cb0f31e2510414e Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Tue, 29 Nov 2022 11:44:08 +0100 Subject: [PATCH] Rename _default_fuzzy_compare to fuzzy_compare, remove from reexported objects --- spacy/matcher/__init__.py | 4 ++-- spacy/matcher/matcher.pyi | 2 +- spacy/matcher/matcher.pyx | 6 +++--- spacy/pipeline/entityruler.py | 5 +++-- spacy/pipeline/span_ruler.py | 5 +++-- spacy/tests/matcher/test_levenshtein.py | 8 ++++---- 6 files changed, 16 insertions(+), 14 deletions(-) diff --git a/spacy/matcher/__init__.py b/spacy/matcher/__init__.py index 2fb347da4..a4f164847 100644 --- a/spacy/matcher/__init__.py +++ b/spacy/matcher/__init__.py @@ -1,6 +1,6 @@ -from .matcher import Matcher, _default_fuzzy_compare +from .matcher import Matcher from .phrasematcher import PhraseMatcher from .dependencymatcher import DependencyMatcher from .levenshtein import levenshtein -__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein", "_default_fuzzy_compare"] +__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"] diff --git a/spacy/matcher/matcher.pyi b/spacy/matcher/matcher.pyi index 9d1aba00c..8fafa3a8f 100644 --- a/spacy/matcher/matcher.pyi +++ b/spacy/matcher/matcher.pyi @@ -54,4 +54,4 @@ class Matcher: ) -> List[Span]: ... def _normalize_key(self, key: Any) -> Any: ... -def _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1) -> bool: ... +def fuzzy_compare(s1: str, s2: str, fuzzy: int = -1) -> bool: ... diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 27d19b8ea..7e04f4609 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -32,7 +32,7 @@ from .levenshtein import levenshtein DEF PADDING = 5 -cpdef bint _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1): +cpdef bint fuzzy_compare(s1: str, s2: str, fuzzy: int = -1): distance = min(len(s1), len(s2)) distance -= 1 # don't allow completely different tokens if fuzzy == -1: # FUZZY operator with unspecified fuzzy @@ -44,7 +44,7 @@ cpdef bint _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1): @registry.misc("spacy.fuzzy_compare.v1") def make_fuzzy_compare(): - return _default_fuzzy_compare + return fuzzy_compare cdef class Matcher: @@ -54,7 +54,7 @@ cdef class Matcher: USAGE: https://spacy.io/usage/rule-based-matching """ - def __init__(self, vocab, validate=True, *, fuzzy_compare=_default_fuzzy_compare): + def __init__(self, vocab, validate=True, *, fuzzy_compare=fuzzy_compare): """Create the Matcher. vocab (Vocab): The vocabulary object, which must be shared with the diff --git a/spacy/pipeline/entityruler.py b/spacy/pipeline/entityruler.py index 2adc1fed2..06b02be05 100644 --- a/spacy/pipeline/entityruler.py +++ b/spacy/pipeline/entityruler.py @@ -10,7 +10,8 @@ from ..language import Language from ..errors import Errors, Warnings from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry from ..tokens import Doc, Span -from ..matcher import Matcher, PhraseMatcher, _default_fuzzy_compare +from ..matcher import Matcher, PhraseMatcher +from ..matcher.matcher import fuzzy_compare from ..scorer import get_ner_prf @@ -89,7 +90,7 @@ class EntityRuler(Pipe): ent_id_sep: str = DEFAULT_ENT_ID_SEP, patterns: Optional[List[PatternType]] = None, scorer: Optional[Callable] = entity_ruler_score, - fuzzy_compare: Callable = _default_fuzzy_compare, + fuzzy_compare: Callable = fuzzy_compare, ) -> None: """Initialize the entity ruler. If patterns are supplied here, they need to be a list of dictionaries with a `"label"` and `"pattern"` diff --git a/spacy/pipeline/span_ruler.py b/spacy/pipeline/span_ruler.py index 385e335d2..d287e6b1c 100644 --- a/spacy/pipeline/span_ruler.py +++ b/spacy/pipeline/span_ruler.py @@ -12,7 +12,8 @@ from ..errors import Errors, Warnings from ..util import ensure_path, SimpleFrozenList, registry from ..tokens import Doc, Span from ..scorer import Scorer -from ..matcher import Matcher, PhraseMatcher, _default_fuzzy_compare +from ..matcher import Matcher, PhraseMatcher +from ..matcher.matcher import fuzzy_compare from .. import util PatternType = Dict[str, Union[str, List[Dict[str, Any]]]] @@ -227,7 +228,7 @@ class SpanRuler(Pipe): scorer: Optional[Callable] = partial( overlapping_labeled_spans_score, spans_key=DEFAULT_SPANS_KEY ), - fuzzy_compare: Callable = _default_fuzzy_compare, + fuzzy_compare: Callable = fuzzy_compare, ) -> None: """Initialize the span ruler. If patterns are supplied here, they need to be a list of dictionaries with a `"label"` and `"pattern"` diff --git a/spacy/tests/matcher/test_levenshtein.py b/spacy/tests/matcher/test_levenshtein.py index f84da75d7..75e2eda13 100644 --- a/spacy/tests/matcher/test_levenshtein.py +++ b/spacy/tests/matcher/test_levenshtein.py @@ -1,6 +1,6 @@ import pytest from spacy.matcher import levenshtein -from spacy.matcher.matcher import _default_fuzzy_compare +from spacy.matcher.matcher import fuzzy_compare # empty string plus 10 random ASCII, 10 random unicode, and 2 random long tests @@ -69,6 +69,6 @@ def test_levenshtein(dist, a, b): ("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max) ], ) -def test_default_fuzzy_compare(a, b, fuzzy, expected): - assert _default_fuzzy_compare(a, b, fuzzy) == expected - assert _default_fuzzy_compare(b, a, fuzzy) == expected +def test_fuzzy_compare(a, b, fuzzy, expected): + assert fuzzy_compare(a, b, fuzzy) == expected + assert fuzzy_compare(b, a, fuzzy) == expected