diff --git a/spacy/language_data/punctuation.py b/spacy/language_data/punctuation.py index fbc93f313..f94d91e80 100644 --- a/spacy/language_data/punctuation.py +++ b/spacy/language_data/punctuation.py @@ -107,7 +107,7 @@ TOKENIZER_INFIXES = ( r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER), r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA), r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS), - r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA) + r'(?<=[{a}"])[:<>=/](?=[{a}])'.format(a=ALPHA) ] ) diff --git a/spacy/tests/regression/test_issue891.py b/spacy/tests/regression/test_issue891.py index b1bec3d25..6e57a750f 100644 --- a/spacy/tests/regression/test_issue891.py +++ b/spacy/tests/regression/test_issue891.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import pytest -@pytest.mark.xfail @pytest.mark.parametrize('text', ["want/need"]) def test_issue891(en_tokenizer, text): """Test that / infixes are split correctly."""