From 22647c2423f12c9caff5046b958170b219d82415 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Wed, 2 Nov 2016 20:35:29 +0100 Subject: [PATCH] Check that patterns aren't null before compiling regex for tokenizer --- spacy/language.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/spacy/language.py b/spacy/language.py index e373c7ce0..4681583c0 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -62,9 +62,18 @@ class BaseDefaults(object): @classmethod def create_tokenizer(cls, nlp=None): rules = cls.tokenizer_exceptions - prefix_search = util.compile_prefix_regex(cls.prefixes).search - suffix_search = util.compile_suffix_regex(cls.suffixes).search - infix_finditer = util.compile_infix_regex(cls.infixes).finditer + if cls.prefixes: + prefix_search = util.compile_prefix_regex(cls.prefixes).search + else: + prefix_search = None + if cls.suffixes: + suffix_search = util.compile_suffix_regex(cls.suffixes).search + else: + suffix_search = None + if cls.infixes: + infix_finditer = util.compile_infix_regex(cls.infixes).finditer + else: + infix_finditer = None vocab = nlp.vocab if nlp is not None else cls.create_vocab(nlp) return Tokenizer(nlp.vocab, rules=rules, prefix_search=prefix_search, suffix_search=suffix_search,