diff --git a/website/docs/usage/customizing-tokenizer.jade b/website/docs/usage/customizing-tokenizer.jade index 05a16fc24..5c9a9fd78 100644 --- a/website/docs/usage/customizing-tokenizer.jade +++ b/website/docs/usage/customizing-tokenizer.jade @@ -201,11 +201,12 @@ p prefix_re = re.compile(r'''[\[\("']''') suffix_re = re.compile(r'''[\]\)"']''') - def create_tokenizer(nlp): + def custom_tokenizer(nlp): return Tokenizer(nlp.vocab, prefix_search=prefix_re.search, suffix_search=suffix_re.search) - nlp = spacy.load('en', tokenizer=create_tokenizer) + nlp = spacy.load('en') + nlp.tokenizer = custom_tokenizer(nlp) p | If you need to subclass the tokenizer instead, the relevant methods to