From 00b9011a49cd7a8b32153c556a3f66e6023dbbf1 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 7 Apr 2017 17:29:59 +0200 Subject: [PATCH 1/2] Fix whitespace --- spacy/tests/regression/test_issue758.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/spacy/tests/regression/test_issue758.py b/spacy/tests/regression/test_issue758.py index 2ddba9975..c0bbb6945 100644 --- a/spacy/tests/regression/test_issue758.py +++ b/spacy/tests/regression/test_issue758.py @@ -5,8 +5,6 @@ from ...matcher import merge_phrase import pytest - - @pytest.mark.models def test_issue758(): '''Test parser transition bug after label added.''' From bf0f15e762cbb2fd9d08db69ab6e3cb209130d65 Mon Sep 17 00:00:00 2001 From: ines Date: Fri, 7 Apr 2017 17:30:44 +0200 Subject: [PATCH 2/2] Add / to tokenizer infixes (resolves #891) --- spacy/language_data/punctuation.py | 2 +- spacy/tests/regression/test_issue891.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/spacy/language_data/punctuation.py b/spacy/language_data/punctuation.py index fbc93f313..f94d91e80 100644 --- a/spacy/language_data/punctuation.py +++ b/spacy/language_data/punctuation.py @@ -107,7 +107,7 @@ TOKENIZER_INFIXES = ( r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER), r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA), r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS), - r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA) + r'(?<=[{a}"])[:<>=/](?=[{a}])'.format(a=ALPHA) ] ) diff --git a/spacy/tests/regression/test_issue891.py b/spacy/tests/regression/test_issue891.py index b1bec3d25..6e57a750f 100644 --- a/spacy/tests/regression/test_issue891.py +++ b/spacy/tests/regression/test_issue891.py @@ -4,7 +4,6 @@ from __future__ import unicode_literals import pytest -@pytest.mark.xfail @pytest.mark.parametrize('text', ["want/need"]) def test_issue891(en_tokenizer, text): """Test that / infixes are split correctly."""