* Add provisional tests for sentence boundary detection

This commit is contained in:
Matthew Honnibal 2015-01-31 13:46:11 +11:00
parent df749f257d
commit 0fd4a71bed

43
tests/test_sbd.py Normal file
View File

@ -0,0 +1,43 @@
from __future__ import unicode_literals
from spacy.en import English
import pytest
@pytest.fixture
def EN():
return English()
def test_single_period(EN):
string = 'A test sentence.'
words = EN(string)
assert len(words) == 4
assert list(words.sents) == [(0, 4)]
def test_single_no_period(EN):
string = 'A test sentence'
words = EN(string)
assert len(words) == 3
assert list(words.sents) == [(0, 3)]
def test_single_exclamation(EN):
string = 'A test sentence!'
words = EN(string)
assert len(words) == 4
assert list(words.sents) == [(0, 4)]
def test_single_question(EN):
string = 'A test sentence?'
words = EN(string, tag=False, parse=False)
assert len(words) == 4
assert list(words.sents) == [(0, 4)]
def test_(EN):
string = 'A test sentence?'
words = EN(string, tag=False, parse=False)
assert len(words) == 4
assert list(words.sents) == [(0, 4)]