From 0fd4a71bed301df8c169c85e36952eceffb3e312 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 31 Jan 2015 13:46:11 +1100 Subject: [PATCH] * Add provisional tests for sentence boundary detection --- tests/test_sbd.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 tests/test_sbd.py diff --git a/tests/test_sbd.py b/tests/test_sbd.py new file mode 100644 index 000000000..6b1e907da --- /dev/null +++ b/tests/test_sbd.py @@ -0,0 +1,43 @@ +from __future__ import unicode_literals +from spacy.en import English + +import pytest + +@pytest.fixture +def EN(): + return English() + + +def test_single_period(EN): + string = 'A test sentence.' + words = EN(string) + assert len(words) == 4 + assert list(words.sents) == [(0, 4)] + + +def test_single_no_period(EN): + string = 'A test sentence' + words = EN(string) + assert len(words) == 3 + assert list(words.sents) == [(0, 3)] + + +def test_single_exclamation(EN): + string = 'A test sentence!' + words = EN(string) + assert len(words) == 4 + assert list(words.sents) == [(0, 4)] + + +def test_single_question(EN): + string = 'A test sentence?' + words = EN(string, tag=False, parse=False) + assert len(words) == 4 + assert list(words.sents) == [(0, 4)] + + +def test_(EN): + string = 'A test sentence?' + words = EN(string, tag=False, parse=False) + assert len(words) == 4 + assert list(words.sents) == [(0, 4)]