From 5ca1646d8af090283cdc3f832af4a24141f5a952 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 28 Sep 2015 18:07:11 +1000
Subject: [PATCH 1/3] * Mark model-requiring tests

---
 website/tests/test_home.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/website/tests/test_home.py b/website/tests/test_home.py
index ed710e107..3b27ba2f7 100644
--- a/website/tests/test_home.py
+++ b/website/tests/test_home.py
@@ -24,6 +24,7 @@ def test_load_resources_and_process_text():
     doc = nlp('Hello, world. Here are two sentences.')
 
 
+@pytest.mark.models
 def test_get_tokens_and_sentences(doc):
     token = doc[0]
     sentence = doc.sents.next()
@@ -65,6 +66,7 @@ def test_export_to_numpy_arrays(nlp, doc):
     assert list(doc_array[:, 1]) == [t.like_url for t in doc]
 
 
+@pytest.mark.models
 def test_word_vectors(nlp):
     doc = nlp("Apples and oranges are similar. Boots and hippos aren't.")
 
@@ -96,6 +98,7 @@ def test_part_of_speech_tags(nlp):
         print(token.tag_)
 
 
+@pytest.mark.models
 def test_syntactic_dependencies():
     def dependency_labels_to_root(token):
         '''Walk up the syntactic tree, collecting the arc labels.'''
@@ -106,6 +109,7 @@ def test_syntactic_dependencies():
         return dep_labels
 
 
+@pytest.mark.models
 def test_named_entities():
     def iter_products(docs):
         for doc in docs:
@@ -151,6 +155,7 @@ def test_calculate_inline_mark_up_on_original_string():
         return string
 
 
+@pytest.mark.models
 def test_efficient_binary_serialization(doc):
     byte_string = doc.as_bytes()
     open('/tmp/moby_dick.bin', 'wb').write(byte_string)

From d8276b3792960cbbafe6718fcc2d307a2292532e Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 28 Sep 2015 19:34:34 +1000
Subject: [PATCH 2/3] * Move test_home to within tests/

---
 {website/tests => tests/website}/test_home.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)
 rename {website/tests => tests/website}/test_home.py (95%)

diff --git a/website/tests/test_home.py b/tests/website/test_home.py
similarity index 95%
rename from website/tests/test_home.py
rename to tests/website/test_home.py
index 3b27ba2f7..8c1ab9b8e 100644
--- a/website/tests/test_home.py
+++ b/tests/website/test_home.py
@@ -1,5 +1,6 @@
 from __future__ import unicode_literals
 import pytest
+import spacy.en
 
 
 @pytest.fixture(scope="session")
@@ -36,7 +37,7 @@ def test_use_integer_ids_for_any_strings(nlp, token):
     hello_id = nlp.vocab.strings['Hello']
     hello_str = nlp.vocab.strings[hello_id]
 
-    assert token.orth  == hello_id  == 469755
+    assert token.orth  == hello_id  == 3404
     assert token.orth_ == hello_str == 'Hello'
 
 
@@ -71,7 +72,7 @@ def test_word_vectors(nlp):
     doc = nlp("Apples and oranges are similar. Boots and hippos aren't.")
 
     apples = doc[0]
-    oranges = doc[1]
+    oranges = doc[2]
     boots = doc[6]
     hippos = doc[8]
 
@@ -157,10 +158,12 @@ def test_calculate_inline_mark_up_on_original_string():
 
 @pytest.mark.models
 def test_efficient_binary_serialization(doc):
-    byte_string = doc.as_bytes()
+    from spacy.tokens.doc import Doc
+
+    byte_string = doc.to_bytes()
     open('/tmp/moby_dick.bin', 'wb').write(byte_string)
 
     nlp = spacy.en.English()
-    for byte_string in Doc.read(open('/tmp/moby_dick.bin', 'rb')):
+    for byte_string in Doc.read_bytes(open('/tmp/moby_dick.bin', 'rb')):
        doc = Doc(nlp.vocab)
        doc.from_bytes(byte_string)

From c03e74272b999a45ea7e46ed7f0ad69409e56a01 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal@gmail.com>
Date: Mon, 28 Sep 2015 21:54:44 +1000
Subject: [PATCH 3/3] * Remove extraneous file.

---
 dev_setup.py | 137 ---------------------------------------------------
 1 file changed, 137 deletions(-)
 delete mode 100644 dev_setup.py

diff --git a/dev_setup.py b/dev_setup.py
deleted file mode 100644
index 8efaba40b..000000000
--- a/dev_setup.py
+++ /dev/null
@@ -1,137 +0,0 @@
-#!/usr/bin/env python
-
-import subprocess
-
-# This is what we're down to...
-try:
-    import Cython
-except ImportError:
-    subprocess.call(['pip install cython'], shell=True)
-
-try:
-    import murmurhash
-except ImportError:
-    subprocess.call(['pip install murmurhash'], shell=True)
-
-try:
-    import cymem
-except ImportError:
-    subprocess.call(['pip install cymem'], shell=True)
-
-try:
-    import preshed
-except ImportError:
-    subprocess.call(['pip install preshed'], shell=True)
-
-try:
-    import thinc
-except ImportError:
-    subprocess.call(['pip install thinc'], shell=True)
-
-try:
-    import numpy
-except ImportError:
-    subprocess.call(['pip install numpy'], shell=True)
-
-
-import Cython.Distutils
-from Cython.Distutils import Extension
-import distutils.core
-
-import sys
-import os
-import os.path
-
-from os import path
-from glob import glob
-
-import numpy
-
-
-def clean(ext):
-    for pyx in ext.sources:
-        if pyx.endswith('.pyx'):
-            c = pyx[:-4] + '.c'
-            cpp = pyx[:-4] + '.cpp'
-            so = pyx[:-4] + '.so'
-            html = pyx[:-4] + '.html'
-            if os.path.exists(so):
-                os.unlink(so)
-            if os.path.exists(c):
-                os.unlink(c)
-            elif os.path.exists(cpp):
-                os.unlink(cpp)
-            if os.path.exists(html):
-                os.unlink(html)
-
-HERE = os.path.dirname(__file__)
-virtual_env = os.environ.get('VIRTUAL_ENV', '')
-compile_args = []
-link_args = []
-libs = []
-
-includes = ['.', numpy.get_include()]
-cython_includes = ['.']
-
-
-if 'VIRTUAL_ENV' in os.environ:
-    includes += glob(path.join(os.environ['VIRTUAL_ENV'], 'include', 'site', '*'))
-else:
-    # If you're not using virtualenv, set your include dir here.
-    pass
-
-ext_args = {'language': "c++", "include_dirs": includes}
-
-exts = [
-    Extension("spacy.typedefs", ["spacy/typedefs.pyx"], **ext_args),
-    Extension("spacy.strings", ["spacy/strings.pyx"], **ext_args),
-    Extension("spacy.lexeme", ["spacy/lexeme.pyx"], **ext_args),
-    Extension("spacy.vocab", ["spacy/vocab.pyx"], **ext_args),
-    Extension("spacy.tokens", ["spacy/tokens.pyx"], **ext_args),
-    Extension("spacy.morphology", ["spacy/morphology.pyx"], **ext_args),
-
-    Extension("spacy._ml", ["spacy/_ml.pyx"], **ext_args),
-
-    Extension("spacy.tokenizer", ["spacy/tokenizer.pyx"], **ext_args),
-    Extension("spacy.en.attrs", ["spacy/en/attrs.pyx"], **ext_args),
-    Extension("spacy.en.pos", ["spacy/en/pos.pyx"], **ext_args),
-    Extension("spacy.syntax.parser", ["spacy/syntax/parser.pyx"], **ext_args),
-    Extension("spacy.syntax._state", ["spacy/syntax/_state.pyx"], **ext_args),
-    Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], **ext_args),
-    Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"],
-              **ext_args)
-
-    #Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.bilou_moves", ["spacy/ner/bilou_moves.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.io_moves", ["spacy/ner/io_moves.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.greedy_parser", ["spacy/ner/greedy_parser.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.pystate", ["spacy/ner/pystate.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.context", ["spacy/ner/context.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.feats", ["spacy/ner/feats.pyx"], language="c++", include_dirs=includes),
-    #Extension("spacy.ner.annot", ["spacy/ner/annot.pyx"], language="c++", include_dirs=includes),
-]
-
-
-if sys.argv[1] == 'clean':
-    print >> sys.stderr, "cleaning .c, .c++ and .so files matching sources"
-    map(clean, exts)
-
-distutils.core.setup(
-    name='spacy',
-    packages=['spacy', 'spacy.en', 'spacy.syntax'],
-    description="Industrial-strength NLP",
-    author='Matthew Honnibal',
-    author_email='honnibal@gmail.com',
-    version='0.1',
-    url="http://honnibal.github.io/spaCy/",
-    package_data={"spacy": ["*.pxd"], "spacy.en": ["*.pxd", "data/pos/*",
-                            "data/wordnet/*", "data/tokenizer/*",
-                            "data/vocab/*"],
-                  "spacy.syntax": ["*.pxd"]},
-    cmdclass={'build_ext': Cython.Distutils.build_ext},
-    ext_modules=exts,
-    license="Dual: Commercial or AGPL",
-    requires=['cython', 'murmurhash', 'cymem', 'preshed', 'thinc', "unidecode",
-              "ujson"]
-)