From 08e29519a62900d6ed6efe52f73476606aab163d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sat, 10 Oct 2015 16:03:13 +1100 Subject: [PATCH 1/4] * Add test for how spaces are attached by the parser. --- tests/parser/test_space_attachment.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 tests/parser/test_space_attachment.py diff --git a/tests/parser/test_space_attachment.py b/tests/parser/test_space_attachment.py new file mode 100644 index 000000000..73cf22cea --- /dev/null +++ b/tests/parser/test_space_attachment.py @@ -0,0 +1,12 @@ +from __future__ import unicode_literals + +import pytest + +@pytest.mark.models +def test_space_attachment(EN): + sentence = 'This is a test.\nTo ensure spaces are attached well.' + doc = EN(sentence) + + for word in doc: + if word.is_space: + assert word.head.i == (word.i - 1) From 693dd065479481ba4296fa932b913315b3a40b8d Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 11 Oct 2015 14:29:12 +1100 Subject: [PATCH 2/4] * Add basic, non-data dependent class creation tests, without depending on pytest. For use in debugging MS build issues, for Issue #132 --- tests/test_basic_create.py | 88 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) create mode 100644 tests/test_basic_create.py diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py new file mode 100644 index 000000000..c170403fa --- /dev/null +++ b/tests/test_basic_create.py @@ -0,0 +1,88 @@ +"""Some quick tests that don't depend on data files or on pytest, for debugging the +MS windows build issues.""" +from __future__ import print_function, unicode_literals + +import unittest +import re + +from spacy.lemmatizer import Lemmatizer +from spacy.morphology import Morphology +from spacy.strings import StringStore +from spacy.vocab import Vocab +from spacy.tokenizer import Tokenizer +from spacy.syntax.arc_eager import ArcEager +from spacy._ml import Model +from spacy.tagger import Tagger +from spacy.syntax.parser import Parser + + +class TestStringStore(unittest.TestCase): + def test_encode_decode(self): + strings = StringStore() + hello_id = strings[u'Hello'] + world_id = strings[u'World'] + + self.assertNotEqual(hello_id, world_id) + + self.assertEqual(strings[hello_id], u'Hello') + self.assertEqual(strings[world_id], u'World') + + self.assertEqual(strings[u'Hello'], hello_id) + self.assertEqual(strings[u'World'], world_id) + + +class TestMorphology(unittest.TestCase): + def test_create(self): + lemmatizer = Lemmatizer({}, {}, {}) + strings = StringStore() + lemmatizer = Lemmatizer({}, {}, {}) + morphology = Morphology(strings, {}, lemmatizer) + + +class TestVocab(unittest.TestCase): + def test_create(self): + vocab = Vocab() + + def test_get_lexeme(self): + vocab = Vocab() + lexeme = vocab[u'Hello'] + assert lexeme.orth_ == u'Hello' + + +class TestTokenizer(unittest.TestCase): + def test_create(self): + vocab = Vocab() + dummy_re = re.compile(r'sklfb;s') + tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re) + doc = tokenizer(u'I am a document.') + + self.assertEqual(len(doc), 4) + + +class TestTagger(unittest.TestCase): + def test_create(self): + vocab = Vocab() + templates = ((1,),) + model = Model(vocab.morphology.n_tags, templates, model_loc=None) + tagger = Tagger(vocab, model) + + +class TestParser(unittest.TestCase): + def test_create(self): + vocab = Vocab() + templates = ((1,),) + labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']} + transition_system = ArcEager(vocab.strings, labels_by_action) + model = Model(vocab.morphology.n_tags, templates, model_loc=None) + + parser = Parser(vocab.strings, transition_system, model) + + +class TestMatcher(unittest.TestCase): + def test_create(self): + vocab = Vocab() + matcher = Matcher(vocab, []) + + +if __name__ == '__main__': + unittest.main() From 1f8f81f0c8e8607da9c54d990f680060308d6a9e Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 11 Oct 2015 14:38:21 +1100 Subject: [PATCH 3/4] * Fix missing import --- tests/test_basic_create.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py index c170403fa..15e8f353f 100644 --- a/tests/test_basic_create.py +++ b/tests/test_basic_create.py @@ -14,6 +14,7 @@ from spacy.syntax.arc_eager import ArcEager from spacy._ml import Model from spacy.tagger import Tagger from spacy.syntax.parser import Parser +from spacy.matcher import Matcher class TestStringStore(unittest.TestCase): From cc92f3f0ed6f7456d300d2a581df1542accfed08 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Sun, 11 Oct 2015 14:59:12 +1100 Subject: [PATCH 4/4] * Fix Matcher test --- tests/test_basic_create.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py index 15e8f353f..d31a35bed 100644 --- a/tests/test_basic_create.py +++ b/tests/test_basic_create.py @@ -82,7 +82,7 @@ class TestParser(unittest.TestCase): class TestMatcher(unittest.TestCase): def test_create(self): vocab = Vocab() - matcher = Matcher(vocab, []) + matcher = Matcher(vocab, {}) if __name__ == '__main__':