From b1fe41b45d6a36cbbabb203ddf0357fca1689265 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 29 Mar 2016 14:31:05 +1100 Subject: [PATCH] * Extend infix test, commenting on limitation of tokenizer w.r.t. infixes at the moment. --- spacy/tests/tokenizer/test_infix.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/spacy/tests/tokenizer/test_infix.py b/spacy/tests/tokenizer/test_infix.py index eda4643a6..7a107733b 100644 --- a/spacy/tests/tokenizer/test_infix.py +++ b/spacy/tests/tokenizer/test_infix.py @@ -38,3 +38,12 @@ def test_double_hyphen(en_tokenizer): assert tokens[1].text == u'decent' assert tokens[2].text == u'--' assert tokens[3].text == u'let' + assert tokens[4].text == u'alone' + assert tokens[5].text == u'well' + assert tokens[6].text == u'-' + # TODO: This points to a deeper issue with the tokenizer: it doesn't re-enter + # on infixes. + #assert tokens[7].text == u'bred' + #assert tokens[8].text == u'--' + #assert tokens[9].text == u'people' +