mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-27 20:33:42 +03:00
Fix custom tokenizer example
This commit is contained in:
parent
706cec6d58
commit
5e60b09dcd
|
@ -201,11 +201,12 @@ p
|
||||||
prefix_re = re.compile(r'''[\[\("']''')
|
prefix_re = re.compile(r'''[\[\("']''')
|
||||||
suffix_re = re.compile(r'''[\]\)"']''')
|
suffix_re = re.compile(r'''[\]\)"']''')
|
||||||
|
|
||||||
def create_tokenizer(nlp):
|
def custom_tokenizer(nlp):
|
||||||
return Tokenizer(nlp.vocab, prefix_search=prefix_re.search,
|
return Tokenizer(nlp.vocab, prefix_search=prefix_re.search,
|
||||||
suffix_search=suffix_re.search)
|
suffix_search=suffix_re.search)
|
||||||
|
|
||||||
nlp = spacy.load('en', tokenizer=create_tokenizer)
|
nlp = spacy.load('en')
|
||||||
|
nlp.tokenizer = custom_tokenizer(nlp)
|
||||||
|
|
||||||
p
|
p
|
||||||
| If you need to subclass the tokenizer instead, the relevant methods to
|
| If you need to subclass the tokenizer instead, the relevant methods to
|
||||||
|
|
Loading…
Reference in New Issue
Block a user