mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Add test for #1250: Tokenizer cache clobbered special-case attrs
This commit is contained in:
parent
18f1c1d0ba
commit
63f0bde749
13
spacy/tests/regression/test_issue1250.py
Normal file
13
spacy/tests/regression/test_issue1250.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
from __future__ import unicode_literals
|
||||
from ...tokenizer import Tokenizer
|
||||
from ...symbols import ORTH, LEMMA, POS
|
||||
from ...lang.en import English
|
||||
|
||||
def test_issue1250_cached_special_cases():
|
||||
nlp = English()
|
||||
nlp.tokenizer.add_special_case(u'reimbur', [{ORTH: u'reimbur', LEMMA: u'reimburse', POS: u'VERB'}])
|
||||
|
||||
lemmas = [w.lemma_ for w in nlp(u'reimbur, reimbur...')]
|
||||
assert lemmas == ['reimburse', ',', 'reimburse', '...']
|
||||
lemmas = [w.lemma_ for w in nlp(u'reimbur, reimbur...')]
|
||||
assert lemmas == ['reimburse', ',', 'reimburse', '...']
|
Loading…
Reference in New Issue
Block a user