mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
Switch to PhraseMatcher.find_matches
This commit is contained in:
parent
63b014d09f
commit
ae348bee43
|
@ -8,6 +8,7 @@ from .structs cimport LexemeC, TokenC
|
|||
from .strings cimport StringStore
|
||||
from .tokens.doc cimport Doc
|
||||
from .vocab cimport Vocab, LexemesOrTokens, _Cached
|
||||
from .matcher.phrasematcher cimport PhraseMatcher, MatchStruct
|
||||
|
||||
|
||||
cdef class Tokenizer:
|
||||
|
@ -21,7 +22,7 @@ cdef class Tokenizer:
|
|||
cdef object _suffix_search
|
||||
cdef object _infix_finditer
|
||||
cdef object _rules
|
||||
cdef object _special_matcher
|
||||
cdef PhraseMatcher _special_matcher
|
||||
cdef int _property_init_count
|
||||
cdef int _property_init_max
|
||||
|
||||
|
|
|
@ -22,7 +22,6 @@ from . import util
|
|||
|
||||
from .attrs import intify_attrs
|
||||
from .lexeme cimport EMPTY_LEXEME
|
||||
from .matcher import PhraseMatcher
|
||||
from .symbols import ORTH
|
||||
|
||||
cdef class Tokenizer:
|
||||
|
@ -242,10 +241,12 @@ cdef class Tokenizer:
|
|||
cdef int orig_final_spacy
|
||||
cdef int orig_idx
|
||||
cdef Pool mem = Pool()
|
||||
spans = [doc[match[1]:match[2]] for match in self._special_matcher(doc)]
|
||||
cdef vector[MatchStruct] c_matches
|
||||
self._special_matcher.find_matches(doc, &c_matches)
|
||||
# Skip processing if no matches
|
||||
if len(spans) == 0:
|
||||
if c_matches.size() == 0:
|
||||
return True
|
||||
spans = [doc[match.start:match.end] for match in c_matches]
|
||||
spans = util.filter_spans(spans)
|
||||
# Put span info in span.start-indexed dict and calculate maximum
|
||||
# intermediate document size
|
||||
|
|
Loading…
Reference in New Issue
Block a user