Add / to tokenizer infixes (resolves #891)

This commit is contained in:
ines 2017-04-07 17:30:44 +02:00
parent 00b9011a49
commit bf0f15e762
2 changed files with 1 additions and 2 deletions

View File

@ -107,7 +107,7 @@ TOKENIZER_INFIXES = (
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER), r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA), r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS), r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA) r'(?<=[{a}"])[:<>=/](?=[{a}])'.format(a=ALPHA)
] ]
) )

View File

@ -4,7 +4,6 @@ from __future__ import unicode_literals
import pytest import pytest
@pytest.mark.xfail
@pytest.mark.parametrize('text', ["want/need"]) @pytest.mark.parametrize('text', ["want/need"])
def test_issue891(en_tokenizer, text): def test_issue891(en_tokenizer, text):
"""Test that / infixes are split correctly.""" """Test that / infixes are split correctly."""