mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Add / to tokenizer infixes (resolves #891)
This commit is contained in:
parent
00b9011a49
commit
bf0f15e762
|
@ -107,7 +107,7 @@ TOKENIZER_INFIXES = (
|
|||
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
||||
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
||||
r'(?<=[{a}])[?";:=,.]*(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
||||
r'(?<=[{a}"])[:<>=](?=[{a}])'.format(a=ALPHA)
|
||||
r'(?<=[{a}"])[:<>=/](?=[{a}])'.format(a=ALPHA)
|
||||
]
|
||||
)
|
||||
|
||||
|
|
|
@ -4,7 +4,6 @@ from __future__ import unicode_literals
|
|||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.parametrize('text', ["want/need"])
|
||||
def test_issue891(en_tokenizer, text):
|
||||
"""Test that / infixes are split correctly."""
|
||||
|
|
Loading…
Reference in New Issue
Block a user