mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
c31a9dabd5
* Auto-format * Add en/em dash to prefixes and suffixes
12 lines
376 B
Python
12 lines
376 B
Python
# coding: utf-8
|
||
from __future__ import unicode_literals
|
||
|
||
|
||
def test_issue3277(es_tokenizer):
|
||
"""Test that hyphens are split correctly as prefixes."""
|
||
doc = es_tokenizer("—Yo me llamo... –murmuró el niño– Emilio Sánchez Pérez.")
|
||
assert len(doc) == 14
|
||
assert doc[0].text == "\u2014"
|
||
assert doc[5].text == "\u2013"
|
||
assert doc[9].text == "\u2013"
|