mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
* Extend infix test, commenting on limitation of tokenizer w.r.t. infixes at the moment.
This commit is contained in:
parent
9c73983bdd
commit
b1fe41b45d
|
@ -38,3 +38,12 @@ def test_double_hyphen(en_tokenizer):
|
|||
assert tokens[1].text == u'decent'
|
||||
assert tokens[2].text == u'--'
|
||||
assert tokens[3].text == u'let'
|
||||
assert tokens[4].text == u'alone'
|
||||
assert tokens[5].text == u'well'
|
||||
assert tokens[6].text == u'-'
|
||||
# TODO: This points to a deeper issue with the tokenizer: it doesn't re-enter
|
||||
# on infixes.
|
||||
#assert tokens[7].text == u'bred'
|
||||
#assert tokens[8].text == u'--'
|
||||
#assert tokens[9].text == u'people'
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user