Check that patterns aren't null before compiling regex for tokenizer

This commit is contained in:
Matthew Honnibal 2016-11-02 20:35:29 +01:00
parent 5ac735df33
commit 22647c2423

View File

@ -62,9 +62,18 @@ class BaseDefaults(object):
@classmethod @classmethod
def create_tokenizer(cls, nlp=None): def create_tokenizer(cls, nlp=None):
rules = cls.tokenizer_exceptions rules = cls.tokenizer_exceptions
prefix_search = util.compile_prefix_regex(cls.prefixes).search if cls.prefixes:
suffix_search = util.compile_suffix_regex(cls.suffixes).search prefix_search = util.compile_prefix_regex(cls.prefixes).search
infix_finditer = util.compile_infix_regex(cls.infixes).finditer else:
prefix_search = None
if cls.suffixes:
suffix_search = util.compile_suffix_regex(cls.suffixes).search
else:
suffix_search = None
if cls.infixes:
infix_finditer = util.compile_infix_regex(cls.infixes).finditer
else:
infix_finditer = None
vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp) vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp)
return Tokenizer(nlp.vocab, rules=rules, return Tokenizer(nlp.vocab, rules=rules,
prefix_search=prefix_search, suffix_search=suffix_search, prefix_search=prefix_search, suffix_search=suffix_search,