From 693dd065479481ba4296fa932b913315b3a40b8d Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 11 Oct 2015 14:29:12 +1100
Subject: [PATCH 1/8] * Add basic, non-data dependent class creation tests,
 without depending on pytest. For use in debugging MS build issues, for Issue
 #132

---
 tests/test_basic_create.py | 88 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 88 insertions(+)
 create mode 100644 tests/test_basic_create.py

diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py
new file mode 100644
index 000000000..c170403fa
--- /dev/null
+++ b/tests/test_basic_create.py
@@ -0,0 +1,88 @@
+"""Some quick tests that don't depend on data files or on pytest, for debugging the
+MS windows build issues."""
+from __future__ import print_function, unicode_literals
+
+import unittest
+import re
+
+from spacy.lemmatizer import Lemmatizer
+from spacy.morphology import Morphology
+from spacy.strings import StringStore
+from spacy.vocab import Vocab
+from spacy.tokenizer import Tokenizer
+from spacy.syntax.arc_eager import ArcEager
+from spacy._ml import Model
+from spacy.tagger import Tagger
+from spacy.syntax.parser import Parser
+
+
+class TestStringStore(unittest.TestCase):
+    def test_encode_decode(self):
+        strings = StringStore()
+        hello_id = strings[u'Hello']
+        world_id = strings[u'World']
+
+        self.assertNotEqual(hello_id, world_id)
+        
+        self.assertEqual(strings[hello_id], u'Hello')
+        self.assertEqual(strings[world_id], u'World')
+
+        self.assertEqual(strings[u'Hello'], hello_id)
+        self.assertEqual(strings[u'World'], world_id)
+
+
+class TestMorphology(unittest.TestCase):
+    def test_create(self):
+        lemmatizer = Lemmatizer({}, {}, {})
+        strings = StringStore()
+        lemmatizer = Lemmatizer({}, {}, {})
+        morphology = Morphology(strings, {}, lemmatizer)
+
+
+class TestVocab(unittest.TestCase):
+    def test_create(self):
+        vocab = Vocab()
+
+    def test_get_lexeme(self):
+        vocab = Vocab()
+        lexeme = vocab[u'Hello']
+        assert lexeme.orth_ == u'Hello'
+
+
+class TestTokenizer(unittest.TestCase):
+    def test_create(self):
+        vocab = Vocab()
+        dummy_re = re.compile(r'sklfb;s')
+        tokenizer = Tokenizer(vocab, {}, dummy_re, dummy_re, dummy_re)
+        doc = tokenizer(u'I am a document.')
+        
+        self.assertEqual(len(doc), 4)
+
+
+class TestTagger(unittest.TestCase):
+    def test_create(self):
+        vocab = Vocab()
+        templates = ((1,),)
+        model = Model(vocab.morphology.n_tags, templates, model_loc=None)
+        tagger = Tagger(vocab, model)
+
+
+class TestParser(unittest.TestCase):
+    def test_create(self):
+        vocab = Vocab()
+        templates = ((1,),)
+        labels_by_action = {0: ['One', 'Two'], 1: ['Two', 'Three']}
+        transition_system = ArcEager(vocab.strings, labels_by_action)
+        model = Model(vocab.morphology.n_tags, templates, model_loc=None)
+        
+        parser = Parser(vocab.strings, transition_system, model)
+
+
+class TestMatcher(unittest.TestCase):
+    def test_create(self):
+        vocab = Vocab()
+        matcher = Matcher(vocab, [])
+
+
+if __name__ == '__main__':
+    unittest.main()

From 1f8f81f0c8e8607da9c54d990f680060308d6a9e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 11 Oct 2015 14:38:21 +1100
Subject: [PATCH 2/8] * Fix missing import

---
 tests/test_basic_create.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py
index c170403fa..15e8f353f 100644
--- a/tests/test_basic_create.py
+++ b/tests/test_basic_create.py
@@ -14,6 +14,7 @@ from spacy.syntax.arc_eager import ArcEager
 from spacy._ml import Model
 from spacy.tagger import Tagger
 from spacy.syntax.parser import Parser
+from spacy.matcher import Matcher
 
 
 class TestStringStore(unittest.TestCase):

From cc92f3f0ed6f7456d300d2a581df1542accfed08 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 11 Oct 2015 14:59:12 +1100
Subject: [PATCH 3/8] * Fix Matcher test

---
 tests/test_basic_create.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py
index 15e8f353f..d31a35bed 100644
--- a/tests/test_basic_create.py
+++ b/tests/test_basic_create.py
@@ -82,7 +82,7 @@ class TestParser(unittest.TestCase):
 class TestMatcher(unittest.TestCase):
     def test_create(self):
         vocab = Vocab()
-        matcher = Matcher(vocab, [])
+        matcher = Matcher(vocab, {})
 
 
 if __name__ == '__main__':

From dba1daf5973a98c3c2472b96727257ec5e1bc08f Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 11 Oct 2015 19:46:53 +1100
Subject: [PATCH 4/8] * Add script to test loading different components

---
 tests/test_basic_load.py | 50 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 50 insertions(+)
 create mode 100644 tests/test_basic_load.py

diff --git a/tests/test_basic_load.py b/tests/test_basic_load.py
new file mode 100644
index 000000000..74e3e1ec7
--- /dev/null
+++ b/tests/test_basic_load.py
@@ -0,0 +1,50 @@
+"""Some quick tests that don't depend on data files or on pytest, for debugging the
+MS windows build issues."""
+from __future__ import print_function, unicode_literals
+
+import unittest
+import re
+from os import path
+
+from spacy.lemmatizer import Lemmatizer
+from spacy.morphology import Morphology
+from spacy.strings import StringStore
+from spacy.vocab import Vocab
+from spacy.tokenizer import Tokenizer
+from spacy.syntax.arc_eager import ArcEager
+from spacy._ml import Model
+from spacy.tagger import Tagger
+from spacy.syntax.parser import Parser
+from spacy.matcher import Matcher
+
+from spacy.en import English
+
+
+class TestLoadVocab(unittest.TestCase):
+    def test_load(self):
+        vocab = Vocab.from_dir(path.join(English.default_data_dir(), 'vocab'))
+
+
+class TestLoadTokenizer(unittest.TestCase):
+    def test_load(self):
+        data_dir = English.default_data_dir()
+        vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
+        tokenizer = Tokenizer.from_dir(vocab, path.join(data_dir, 'tokenizer'))
+
+
+class TestLoadTagger(unittest.TestCase):
+    def test_load(self):
+        data_dir = English.default_data_dir()
+        vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
+        tagger = Tagger.from_dir(path.join(data_dir, 'tagger'), vocab)
+
+
+class TestLoadParser(unittest.TestCase):
+    def test_load(self):
+        data_dir = English.default_data_dir()
+        vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
+        parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
+
+
+if __name__ == '__main__':
+    unittest.main()

From afec8cac200af61e5a058fee902d8c1788dbfb8e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Sun, 11 Oct 2015 22:40:04 +1100
Subject: [PATCH 5/8] * Add more tests to probe mingw32 failure

---
 tests/test_basic_load.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tests/test_basic_load.py b/tests/test_basic_load.py
index 74e3e1ec7..eb7adbe97 100644
--- a/tests/test_basic_load.py
+++ b/tests/test_basic_load.py
@@ -16,9 +16,12 @@ from spacy._ml import Model
 from spacy.tagger import Tagger
 from spacy.syntax.parser import Parser
 from spacy.matcher import Matcher
+from spacy.syntax.parser import get_templates
 
 from spacy.en import English
 
+from thinc.learner import LinearModel
+
 
 class TestLoadVocab(unittest.TestCase):
     def test_load(self):
@@ -45,6 +48,28 @@ class TestLoadParser(unittest.TestCase):
         vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
         parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
 
+    def test_load_careful(self):
+        config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1}
+
+        data_dir = English.default_data_dir()
+        vocab = Vocab.from_dir(path.join(data_dir, 'vocab'))
+
+        moves = ArcEager(vocab.strings, config_data['labels'])
+        templates = get_templates(config_data['features'])
+
+        model = Model(moves.n_moves, templates, path.join(data_dir, 'deps'))
+
+        parser = Parser(vocab.strings, moves, model)
+
+    def test_thinc_load(self):
+        data_dir = English.default_data_dir()
+        model_loc = path.join(data_dir, 'deps', 'model')
+
+        # n classes. moves.n_moves above
+        # n features. len(templates) + 1 above
+        model = LinearModel(92, 116)
+        model.load(model_loc)
+
 
 if __name__ == '__main__':
     unittest.main()

From 3b79d6746250cf2afefe8e1d6e573ffb16617015 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 12 Oct 2015 00:48:18 +1100
Subject: [PATCH 6/8] * Fix assertion in test_basic_create

---
 tests/test_basic_create.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_basic_create.py b/tests/test_basic_create.py
index d31a35bed..900a7bc64 100644
--- a/tests/test_basic_create.py
+++ b/tests/test_basic_create.py
@@ -47,7 +47,7 @@ class TestVocab(unittest.TestCase):
     def test_get_lexeme(self):
         vocab = Vocab()
         lexeme = vocab[u'Hello']
-        assert lexeme.orth_ == u'Hello'
+        self.assertEqual(lexeme.orth_, u'Hello')
 
 
 class TestTokenizer(unittest.TestCase):

From e886e6a4064249f953f90aee6b7656bdc91715cd Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Tue, 13 Oct 2015 13:46:17 +1100
Subject: [PATCH 7/8] * Inc version

---
 setup.py             | 2 +-
 spacy/en/download.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index 3036db94c..1da39c2cf 100644
--- a/setup.py
+++ b/setup.py
@@ -134,7 +134,7 @@ def run_setup(exts):
     headers_workaround.install_headers('numpy')
 
 
-VERSION = '0.94'
+VERSION = '0.95'
 def main(modules, is_pypy):
     language = "cpp"
     includes = ['.', path.join(sys.prefix, 'include')]
diff --git a/spacy/en/download.py b/spacy/en/download.py
index 01c87a4e4..91f31565b 100644
--- a/spacy/en/download.py
+++ b/spacy/en/download.py
@@ -7,7 +7,7 @@ import wget
 import plac
 
 # TODO: Read this from the same source as the setup
-VERSION = '0.9.1'
+VERSION = '0.9.5'
 
 AWS_STORE = 'https://s3-us-west-1.amazonaws.com/media.spacynlp.com'
 

From 6c2da06c18c015b562eb8975fb7c7c26c51c0bfa Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Tue, 13 Oct 2015 13:52:10 +1100
Subject: [PATCH 8/8] * Package tag_map.json

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 1da39c2cf..5bc9b956f 100644
--- a/setup.py
+++ b/setup.py
@@ -92,6 +92,7 @@ def cython_setup(mod_names, language, includes, compile_args, link_args):
         package_data={"spacy": ["*.pxd"],
                       "spacy.en": ["*.pxd", "data/pos/*",
                                    "data/wordnet/*", "data/tokenizer/*",
+                                   "data/vocab/tag_map.json",
                                    "data/vocab/lexemes.bin",
                                    "data/vocab/strings.txt"],
                       "spacy.syntax": ["*.pxd"]},