Rename _default_fuzzy_compare to fuzzy_compare, remove from reexported objects

This commit is contained in:
Adriane Boyd 2022-11-29 11:44:08 +01:00
parent d1628df277
commit 3c6dc10d60
6 changed files with 16 additions and 14 deletions

View File

@ -1,6 +1,6 @@
from .matcher import Matcher, _default_fuzzy_compare from .matcher import Matcher
from .phrasematcher import PhraseMatcher from .phrasematcher import PhraseMatcher
from .dependencymatcher import DependencyMatcher from .dependencymatcher import DependencyMatcher
from .levenshtein import levenshtein from .levenshtein import levenshtein
__all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein", "_default_fuzzy_compare"] __all__ = ["Matcher", "PhraseMatcher", "DependencyMatcher", "levenshtein"]

View File

@ -54,4 +54,4 @@ class Matcher:
) -> List[Span]: ... ) -> List[Span]: ...
def _normalize_key(self, key: Any) -> Any: ... def _normalize_key(self, key: Any) -> Any: ...
def _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1) -> bool: ... def fuzzy_compare(s1: str, s2: str, fuzzy: int = -1) -> bool: ...

View File

@ -32,7 +32,7 @@ from .levenshtein import levenshtein
DEF PADDING = 5 DEF PADDING = 5
cpdef bint _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1): cpdef bint fuzzy_compare(s1: str, s2: str, fuzzy: int = -1):
distance = min(len(s1), len(s2)) distance = min(len(s1), len(s2))
distance -= 1 # don't allow completely different tokens distance -= 1 # don't allow completely different tokens
if fuzzy == -1: # FUZZY operator with unspecified fuzzy if fuzzy == -1: # FUZZY operator with unspecified fuzzy
@ -44,7 +44,7 @@ cpdef bint _default_fuzzy_compare(s1: str, s2: str, fuzzy: int = -1):
@registry.misc("spacy.fuzzy_compare.v1") @registry.misc("spacy.fuzzy_compare.v1")
def make_fuzzy_compare(): def make_fuzzy_compare():
return _default_fuzzy_compare return fuzzy_compare
cdef class Matcher: cdef class Matcher:
@ -54,7 +54,7 @@ cdef class Matcher:
USAGE: https://spacy.io/usage/rule-based-matching USAGE: https://spacy.io/usage/rule-based-matching
""" """
def __init__(self, vocab, validate=True, *, fuzzy_compare=_default_fuzzy_compare): def __init__(self, vocab, validate=True, *, fuzzy_compare=fuzzy_compare):
"""Create the Matcher. """Create the Matcher.
vocab (Vocab): The vocabulary object, which must be shared with the vocab (Vocab): The vocabulary object, which must be shared with the

View File

@ -10,7 +10,8 @@ from ..language import Language
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry from ..util import ensure_path, to_disk, from_disk, SimpleFrozenList, registry
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..matcher import Matcher, PhraseMatcher, _default_fuzzy_compare from ..matcher import Matcher, PhraseMatcher
from ..matcher.matcher import fuzzy_compare
from ..scorer import get_ner_prf from ..scorer import get_ner_prf
@ -89,7 +90,7 @@ class EntityRuler(Pipe):
ent_id_sep: str = DEFAULT_ENT_ID_SEP, ent_id_sep: str = DEFAULT_ENT_ID_SEP,
patterns: Optional[List[PatternType]] = None, patterns: Optional[List[PatternType]] = None,
scorer: Optional[Callable] = entity_ruler_score, scorer: Optional[Callable] = entity_ruler_score,
fuzzy_compare: Callable = _default_fuzzy_compare, fuzzy_compare: Callable = fuzzy_compare,
) -> None: ) -> None:
"""Initialize the entity ruler. If patterns are supplied here, they """Initialize the entity ruler. If patterns are supplied here, they
need to be a list of dictionaries with a `"label"` and `"pattern"` need to be a list of dictionaries with a `"label"` and `"pattern"`

View File

@ -12,7 +12,8 @@ from ..errors import Errors, Warnings
from ..util import ensure_path, SimpleFrozenList, registry from ..util import ensure_path, SimpleFrozenList, registry
from ..tokens import Doc, Span from ..tokens import Doc, Span
from ..scorer import Scorer from ..scorer import Scorer
from ..matcher import Matcher, PhraseMatcher, _default_fuzzy_compare from ..matcher import Matcher, PhraseMatcher
from ..matcher.matcher import fuzzy_compare
from .. import util from .. import util
PatternType = Dict[str, Union[str, List[Dict[str, Any]]]] PatternType = Dict[str, Union[str, List[Dict[str, Any]]]]
@ -227,7 +228,7 @@ class SpanRuler(Pipe):
scorer: Optional[Callable] = partial( scorer: Optional[Callable] = partial(
overlapping_labeled_spans_score, spans_key=DEFAULT_SPANS_KEY overlapping_labeled_spans_score, spans_key=DEFAULT_SPANS_KEY
), ),
fuzzy_compare: Callable = _default_fuzzy_compare, fuzzy_compare: Callable = fuzzy_compare,
) -> None: ) -> None:
"""Initialize the span ruler. If patterns are supplied here, they """Initialize the span ruler. If patterns are supplied here, they
need to be a list of dictionaries with a `"label"` and `"pattern"` need to be a list of dictionaries with a `"label"` and `"pattern"`

View File

@ -1,6 +1,6 @@
import pytest import pytest
from spacy.matcher import levenshtein from spacy.matcher import levenshtein
from spacy.matcher.matcher import _default_fuzzy_compare from spacy.matcher.matcher import fuzzy_compare
# empty string plus 10 random ASCII, 10 random unicode, and 2 random long tests # empty string plus 10 random ASCII, 10 random unicode, and 2 random long tests
@ -69,6 +69,6 @@ def test_levenshtein(dist, a, b):
("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max) ("abcdefgh", "cdefghijkl", -1, False), # default equivalent to 5 (max)
], ],
) )
def test_default_fuzzy_compare(a, b, fuzzy, expected): def test_fuzzy_compare(a, b, fuzzy, expected):
assert _default_fuzzy_compare(a, b, fuzzy) == expected assert fuzzy_compare(a, b, fuzzy) == expected
assert _default_fuzzy_compare(b, a, fuzzy) == expected assert fuzzy_compare(b, a, fuzzy) == expected