Add regression test for #704 and test new model (resolves #704)

(using new English model)
This commit is contained in:
ines 2017-03-18 16:04:14 +01:00
parent ff277140f9
commit f57c616830

View File

@ -0,0 +1,14 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
@pytest.mark.models
def test_issue704(EN):
"""Test that sentence boundaries are detected correctly."""
text = '“Atticus said to Jem one day, “Id rather you shot at tin cans in the backyard, but I know youll go after birds. Shoot all the blue jays you want, if you can hit em, but remember its a sin to kill a mockingbird.”'
doc = EN(text)
sents = [sent for sent in doc.sents]
assert len(sents) == 3