mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Add regression test for #801
This commit is contained in:
parent
16ce7409e4
commit
13a4ab37e0
20
spacy/tests/regression/test_issue801.py
Normal file
20
spacy/tests/regression/test_issue801.py
Normal file
|
@ -0,0 +1,20 @@
|
|||
# coding: utf-8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.mark.xfail
|
||||
@pytest.mark.parametrize('text,tokens', [
|
||||
('"deserve,"--and', ['"', "deserve", ",", '"', "--", "and"]),
|
||||
("exception;--exclusive", ["exception", ";", "--", "exclusive"]),
|
||||
("day.--Is", ["day", ".", "--", "Is"]),
|
||||
("refinement:--just", ["refinement", ":", "--", "just"]),
|
||||
("memories?--To", ["memories", "?", "--", "To"]),
|
||||
("Useful.=--Therefore", ["Useful", ".", "=", "--", "Therefore"]),
|
||||
("=Hope.=--Pandora", ["=", "Hope", ".", "=", "--", "Pandora"])])
|
||||
def test_issue801(en_tokenizer, text, tokens):
|
||||
"""Test that special characters + hyphens are split correctly."""
|
||||
doc = en_tokenizer(text)
|
||||
assert len(doc) == len(tokens)
|
||||
assert [t.text for t in doc] == tokens
|
Loading…
Reference in New Issue
Block a user