mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 02:04:07 +03:00
Switch to PhraseMatcher.find_matches
This commit is contained in:
parent
63b014d09f
commit
ae348bee43
|
@ -8,6 +8,7 @@ from .structs cimport LexemeC, TokenC
|
||||||
from .strings cimport StringStore
|
from .strings cimport StringStore
|
||||||
from .tokens.doc cimport Doc
|
from .tokens.doc cimport Doc
|
||||||
from .vocab cimport Vocab, LexemesOrTokens, _Cached
|
from .vocab cimport Vocab, LexemesOrTokens, _Cached
|
||||||
|
from .matcher.phrasematcher cimport PhraseMatcher, MatchStruct
|
||||||
|
|
||||||
|
|
||||||
cdef class Tokenizer:
|
cdef class Tokenizer:
|
||||||
|
@ -21,7 +22,7 @@ cdef class Tokenizer:
|
||||||
cdef object _suffix_search
|
cdef object _suffix_search
|
||||||
cdef object _infix_finditer
|
cdef object _infix_finditer
|
||||||
cdef object _rules
|
cdef object _rules
|
||||||
cdef object _special_matcher
|
cdef PhraseMatcher _special_matcher
|
||||||
cdef int _property_init_count
|
cdef int _property_init_count
|
||||||
cdef int _property_init_max
|
cdef int _property_init_max
|
||||||
|
|
||||||
|
|
|
@ -22,7 +22,6 @@ from . import util
|
||||||
|
|
||||||
from .attrs import intify_attrs
|
from .attrs import intify_attrs
|
||||||
from .lexeme cimport EMPTY_LEXEME
|
from .lexeme cimport EMPTY_LEXEME
|
||||||
from .matcher import PhraseMatcher
|
|
||||||
from .symbols import ORTH
|
from .symbols import ORTH
|
||||||
|
|
||||||
cdef class Tokenizer:
|
cdef class Tokenizer:
|
||||||
|
@ -242,10 +241,12 @@ cdef class Tokenizer:
|
||||||
cdef int orig_final_spacy
|
cdef int orig_final_spacy
|
||||||
cdef int orig_idx
|
cdef int orig_idx
|
||||||
cdef Pool mem = Pool()
|
cdef Pool mem = Pool()
|
||||||
spans = [doc[match[1]:match[2]] for match in self._special_matcher(doc)]
|
cdef vector[MatchStruct] c_matches
|
||||||
|
self._special_matcher.find_matches(doc, &c_matches)
|
||||||
# Skip processing if no matches
|
# Skip processing if no matches
|
||||||
if len(spans) == 0:
|
if c_matches.size() == 0:
|
||||||
return True
|
return True
|
||||||
|
spans = [doc[match.start:match.end] for match in c_matches]
|
||||||
spans = util.filter_spans(spans)
|
spans = util.filter_spans(spans)
|
||||||
# Put span info in span.start-indexed dict and calculate maximum
|
# Put span info in span.start-indexed dict and calculate maximum
|
||||||
# intermediate document size
|
# intermediate document size
|
||||||
|
|
Loading…
Reference in New Issue
Block a user