mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Add / to tokenizer infixes (resolves #891)
This commit is contained in:
parent
00b9011a49
commit
bf0f15e762
|
@ -107,7 +107,7 @@ TOKENIZER_INFIXES = (
|
||||||
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
||||||
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
||||||
r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
||||||
r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA)
|
r'(?<=[{a}"])[:<>=/](?=[{a}])'.format(a=ALPHA)
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
|
||||||
@pytest.mark.parametrize('text', ["want/need"])
|
@pytest.mark.parametrize('text', ["want/need"])
|
||||||
def test_issue891(en_tokenizer, text):
|
def test_issue891(en_tokenizer, text):
|
||||||
"""Test that / infixes are split correctly."""
|
"""Test that / infixes are split correctly."""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user