mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* enable fuzzy matching * add fuzzy param to EntityMatcher * include rapidfuzz_capi not yet used * fix type * add FUZZY predicate * add fuzzy attribute list * fix type properly * tidying * remove unnecessary dependency * handle fuzzy sets * simplify fuzzy sets * case fix * switch to FUZZYn predicates use Levenshtein distance. remove fuzzy param. remove rapidfuzz_capi. * revert changes added for fuzzy param * switch to polyleven (Python package) * enable fuzzy matching * add fuzzy param to EntityMatcher * include rapidfuzz_capi not yet used * fix type * add FUZZY predicate * add fuzzy attribute list * fix type properly * tidying * remove unnecessary dependency * handle fuzzy sets * simplify fuzzy sets * case fix * switch to FUZZYn predicates use Levenshtein distance. remove fuzzy param. remove rapidfuzz_capi. * revert changes added for fuzzy param * switch to polyleven (Python package) * fuzzy match only on oov tokens * remove polyleven * exclude whitespace tokens * don't allow more edits than characters * fix min distance * reinstate FUZZY operator with length-based distance function * handle sets inside regex operator * remove is_oov check * attempt build fix no mypy failure locally * re-attempt build fix * don't overwrite fuzzy param value * move fuzzy_match to its own Python module to allow patching * move fuzzy_match back inside Matcher simplify logic and add tests * Format tests * Parametrize fuzzyn tests * Parametrize and merge fuzzy+set tests * Format * Move fuzzy_match to a standalone method * Change regex kwarg type to bool * Add types for fuzzy_match - Refactor variable names - Add test for symmetrical behavior * Parametrize fuzzyn+set tests * Minor refactoring for fuzz/fuzzy * Make fuzzy_match a Matcher kwarg * Update type for _default_fuzzy_match * don't overwrite function param * Rename to fuzzy_compare * Update fuzzy_compare default argument declarations * allow fuzzy_compare override from EntityRuler * define new Matcher keyword arg * fix type definition * Implement fuzzy_compare config option for EntityRuler and SpanRuler * Rename _default_fuzzy_compare to fuzzy_compare, remove from reexported objects * Use simpler fuzzy_compare algorithm * Update types * Increase minimum to 2 in fuzzy_compare to allow one transposition * Fix predicate keys and matching for SetPredicate with FUZZY and REGEX * Add FUZZY6..9 * Add initial docs * Increase default fuzzy to rounded 30% of pattern length * Update docs for fuzzy_compare in components * Update EntityRuler and SpanRuler API docs * Rename EntityRuler and SpanRuler setting to matcher_fuzzy_compare To having naming similar to `phrase_matcher_attr`, rename `fuzzy_compare` setting for `EntityRuler` and `SpanRuler` to `matcher_fuzzy_compare. Organize next to `phrase_matcher_attr` in docs. * Fix schema aliases Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Fix typo Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Add FUZZY6-9 operators and update tests * Parameterize test over greedy Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> * Fix type for fuzzy_compare to remove Optional * Rename to spacy.levenshtein_compare.v1, move to spacy.matcher.levenshtein * Update docs following levenshtein_compare renaming Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
		
			
				
	
	
		
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			56 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
from typing import Any, List, Dict, Tuple, Optional, Callable, Union
 | 
						|
from typing import Iterator, Iterable, overload
 | 
						|
from ..compat import Literal
 | 
						|
from ..vocab import Vocab
 | 
						|
from ..tokens import Doc, Span
 | 
						|
 | 
						|
class Matcher:
 | 
						|
    def __init__(self, vocab: Vocab, validate: bool = ...,
 | 
						|
                 fuzzy_compare: Callable[[str, str, int], bool] = ...) -> None: ...
 | 
						|
    def __reduce__(self) -> Any: ...
 | 
						|
    def __len__(self) -> int: ...
 | 
						|
    def __contains__(self, key: str) -> bool: ...
 | 
						|
    def add(
 | 
						|
        self,
 | 
						|
        key: Union[str, int],
 | 
						|
        patterns: List[List[Dict[str, Any]]],
 | 
						|
        *,
 | 
						|
        on_match: Optional[
 | 
						|
            Callable[[Matcher, Doc, int, List[Tuple[Any, ...]]], Any]
 | 
						|
        ] = ...,
 | 
						|
        greedy: Optional[str] = ...
 | 
						|
    ) -> None: ...
 | 
						|
    def remove(self, key: str) -> None: ...
 | 
						|
    def has_key(self, key: Union[str, int]) -> bool: ...
 | 
						|
    def get(
 | 
						|
        self, key: Union[str, int], default: Optional[Any] = ...
 | 
						|
    ) -> Tuple[Optional[Callable[[Any], Any]], List[List[Dict[Any, Any]]]]: ...
 | 
						|
    def pipe(
 | 
						|
        self,
 | 
						|
        docs: Iterable[Tuple[Doc, Any]],
 | 
						|
        batch_size: int = ...,
 | 
						|
        return_matches: bool = ...,
 | 
						|
        as_tuples: bool = ...,
 | 
						|
    ) -> Union[
 | 
						|
        Iterator[Tuple[Tuple[Doc, Any], Any]], Iterator[Tuple[Doc, Any]], Iterator[Doc]
 | 
						|
    ]: ...
 | 
						|
    @overload
 | 
						|
    def __call__(
 | 
						|
        self,
 | 
						|
        doclike: Union[Doc, Span],
 | 
						|
        *,
 | 
						|
        as_spans: Literal[False] = ...,
 | 
						|
        allow_missing: bool = ...,
 | 
						|
        with_alignments: bool = ...
 | 
						|
    ) -> List[Tuple[int, int, int]]: ...
 | 
						|
    @overload
 | 
						|
    def __call__(
 | 
						|
        self,
 | 
						|
        doclike: Union[Doc, Span],
 | 
						|
        *,
 | 
						|
        as_spans: Literal[True],
 | 
						|
        allow_missing: bool = ...,
 | 
						|
        with_alignments: bool = ...
 | 
						|
    ) -> List[Span]: ...
 | 
						|
    def _normalize_key(self, key: Any) -> Any: ...
 |