mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Check that patterns aren't null before compiling regex for tokenizer
This commit is contained in:
parent
5ac735df33
commit
22647c2423
|
@ -62,9 +62,18 @@ class BaseDefaults(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def create_tokenizer(cls, nlp=None):
|
def create_tokenizer(cls, nlp=None):
|
||||||
rules = cls.tokenizer_exceptions
|
rules = cls.tokenizer_exceptions
|
||||||
prefix_search = util.compile_prefix_regex(cls.prefixes).search
|
if cls.prefixes:
|
||||||
suffix_search = util.compile_suffix_regex(cls.suffixes).search
|
prefix_search = util.compile_prefix_regex(cls.prefixes).search
|
||||||
infix_finditer = util.compile_infix_regex(cls.infixes).finditer
|
else:
|
||||||
|
prefix_search = None
|
||||||
|
if cls.suffixes:
|
||||||
|
suffix_search = util.compile_suffix_regex(cls.suffixes).search
|
||||||
|
else:
|
||||||
|
suffix_search = None
|
||||||
|
if cls.infixes:
|
||||||
|
infix_finditer = util.compile_infix_regex(cls.infixes).finditer
|
||||||
|
else:
|
||||||
|
infix_finditer = None
|
||||||
vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
|
vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
|
||||||
return Tokenizer(nlp.vocab, rules=rules,
|
return Tokenizer(nlp.vocab, rules=rules,
|
||||||
prefix_search=prefix_search, suffix_search=suffix_search,
|
prefix_search=prefix_search, suffix_search=suffix_search,
|
||||||
|
|
Loading…
Reference in New Issue
Block a user