mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Added regression text for 1494
This commit is contained in:
parent
ada4712250
commit
8be3392302
27
spacy/tests/regression/test_issue1494.py
Normal file
27
spacy/tests/regression/test_issue1494.py
Normal file
|
@ -0,0 +1,27 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from ...lang.en import English
|
||||
from ...tokenizer import Tokenizer
|
||||
|
||||
|
||||
def test_issue1494():
|
||||
infix_re = re.compile(r'''[^a-z]''')
|
||||
text_to_tokenize = 'token 123test'
|
||||
expected_tokens = ['token', '1', '2', '3', 'test']
|
||||
|
||||
def my_tokenizer(nlp):
|
||||
return Tokenizer(nlp.vocab,
|
||||
{},
|
||||
infix_finditer=infix_re.finditer
|
||||
)
|
||||
|
||||
nlp = English()
|
||||
|
||||
nlp.tokenizer = my_tokenizer(nlp)
|
||||
tokenized_words = [token.text for token in nlp(text_to_tokenize)]
|
||||
print(tokenized_words)
|
||||
assert tokenized_words == expected_tokens
|
Loading…
Reference in New Issue
Block a user