spaCy/spacy/tests/regression/test_issue704.py

15 lines
516 B
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
import pytest
@pytest.mark.models('en')
def test_issue704(EN):
"""Test that sentence boundaries are detected correctly."""
text = '“Atticus said to Jem one day, “Id rather you shot at tin cans in the backyard, but I know youll go after birds. Shoot all the blue jays you want, if you can hit em, but remember its a sin to kill a mockingbird.”'
doc = EN(text)
sents = [sent for sent in doc.sents]
assert len(sents) == 3