Fix custom tokenizer example

This commit is contained in:
ines 2017-06-01 13:02:50 +02:00
parent 706cec6d58
commit 5e60b09dcd

View File

@ -201,11 +201,12 @@ p
prefix_re = re.compile(r'''[\[\("']''') prefix_re = re.compile(r'''[\[\("']''')
suffix_re = re.compile(r'''[\]\)"']''') suffix_re = re.compile(r'''[\]\)"']''')
def create_tokenizer(nlp): def custom_tokenizer(nlp):
return Tokenizer(nlp.vocab, prefix_search=prefix_re.search, return Tokenizer(nlp.vocab, prefix_search=prefix_re.search,
suffix_search=suffix_re.search) suffix_search=suffix_re.search)
nlp = spacy.load('en', tokenizer=create_tokenizer) nlp = spacy.load('en')
nlp.tokenizer = custom_tokenizer(nlp)
p p
| If you need to subclass the tokenizer instead, the relevant methods to | If you need to subclass the tokenizer instead, the relevant methods to