This commit is contained in:
Kevin Humphreys 2022-08-26 02:06:05 +02:00
parent c017de997a
commit c03394810b
3 changed files with 6 additions and 4 deletions

View File

@ -8,5 +8,7 @@ requires = [
"thinc>=8.1.0,<8.2.0",
"pathy",
"numpy>=1.15.0",
"rapidfuzz>=2.4.0,<3.0.0",
"rapidfuzz_capi>=1.0.5,<2.0.0",
]
build-backend = "setuptools.build_meta"

View File

@ -34,6 +34,7 @@ python_requires = >=3.6
setup_requires =
cython>=0.25,<3.0
numpy>=1.15.0
rapidfuzz_capi>=1.0.5,<2.0.0
# We also need our Cython packages here to compile against
cymem>=2.0.2,<2.1.0
preshed>=3.0.2,<3.1.0

View File

@ -20,7 +20,6 @@ from ..tokens.span cimport Span
from ..tokens.token cimport Token
from ..tokens.morphanalysis cimport MorphAnalysis
from ..attrs cimport ID, attr_id_t, NULL_ATTR, ORTH, POS, TAG, DEP, LEMMA, MORPH, ENT_IOB
from ..attrs cimport LOWER, NORM
from ..schemas import validate_token_pattern
from ..errors import Errors, MatchPatternError, Warnings
@ -258,8 +257,7 @@ cdef class Matcher:
matches = []
else:
matches = find_matches(&self.patterns[0], self.patterns.size(), doclike, length,
extensions=self._extensions, predicates=self._extra_predicates,
with_alignments=with_alignments,
extensions=self._extensions, predicates=self._extra_predicates, with_alignments=with_alignments,
fuzzy=self.fuzzy, fuzzy_attrs=self.fuzzy_attrs)
final_matches = []
pairs_by_id = {}
@ -341,7 +339,8 @@ def unpickle_matcher(vocab, patterns, callbacks):
return matcher
cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(), bint with_alignments=0, float fuzzy=0, list fuzzy_attrs=[]):
cdef find_matches(TokenPatternC** patterns, int n, object doclike, int length, extensions=None, predicates=tuple(), bint with_alignments=0,
float fuzzy=0, list fuzzy_attrs=[]):
"""Find matches in a doc, with a compiled array of patterns. Matches are
returned as a list of (id, start, end) tuples or (id, start, end, alignments) tuples (if with_alignments != 0)