From c03394810b6d46dc13e8c68ba962f75f45aeeb9c Mon Sep 17 00:00:00 2001 From: Kevin Humphreys Date: Fri, 26 Aug 2022 02:06:05 +0200 Subject: [PATCH] tidying --- pyproject.toml | 2 ++ setup.cfg | 1 + spacy/matcher/matcher.pyx | 7 +++---- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 317c5fdbe..37d041b6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,5 +8,7 @@ requires = [ "thinc>=8.1.0,<8.2.0", "pathy", "numpy>=1.15.0", + "rapidfuzz>=2.4.0,<3.0.0", + "rapidfuzz_capi>=1.0.5,<2.0.0", ] build-backend = "setuptools.build_meta" diff --git a/setup.cfg b/setup.cfg index 658683df7..91c73cb5c 100644 --- a/setup.cfg +++ b/setup.cfg @@ -34,6 +34,7 @@ python_requires = >=3.6 setup_requires = cython>=0.25,<3.0 numpy>=1.15.0 + rapidfuzz_capi>=1.0.5,<2.0.0 # We also need our Cython packages here to compile against cymem>=2.0.2,<2.1.0 preshed>=3.0.2,<3.1.0 diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 17d965eaa..4a5468b98 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -20,7 +20,6 @@ from ..tokens.span cimport Span from ..tokens.token cimport Token from ..tokens.morphanalysis cimport MorphAnalysis from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA, MORPH, ENT_IOB -from ..attrs cimport LOWER, NORM from ..schemas import validate_token_pattern from ..errors import Errors, MatchPatternError, Warnings @@ -258,8 +257,7 @@ cdef class Matcher: matches = [] else: matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length, - extensions=self._extensions, predicates=self._extra_predicates, - with_alignments=with_alignments, + extensions=self._extensions, predicates=self._extra_predicates, with_alignments=with_alignments, fuzzy=self.fuzzy, fuzzy_attrs=self.fuzzy_attrs) final_matches = [] pairs_by_id = {} @@ -341,7 +339,8 @@ def unpickle_matcher(vocab, patterns, callbacks): return matcher -cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(), bint with_alignments=0, float fuzzy=0, list fuzzy_attrs=[]): +cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(), bint with_alignments=0, + float fuzzy=0, list fuzzy_attrs=[]): """Find matches in a doc, with a compiled array of patterns. Matches are returned as a list of (id, start, end) tuples or (id, start, end, alignments) tuples (if with_alignments != 0)