From 7f579ae834398b2045bd19e9032e82ee425739c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= <raphael@bournhonesque.eu>
Date: Sun, 19 Mar 2017 11:40:29 +0100
Subject: [PATCH 01/30] Remove duplicate keys in [en|fi] data dicts

---
 spacy/en/morph_rules.py          | 1 -
 spacy/fi/tokenizer_exceptions.py | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/spacy/en/morph_rules.py b/spacy/en/morph_rules.py
index 2b8aae823..51a50736e 100644
--- a/spacy/en/morph_rules.py
+++ b/spacy/en/morph_rules.py
@@ -21,7 +21,6 @@ MORPH_RULES = {
         "them":         {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Plur", "Case": "Acc"},
 
         "mine":         {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "One", "Number": "Sing", "Poss": "Yes", "Reflex": "Yes"},
-        "yours":        {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Two", "Poss": "Yes", "Reflex": "Yes"},
         "his":          {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Masc", "Poss": "Yes", "Reflex": "Yes"},
         "hers":         {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Fem",  "Poss": "Yes", "Reflex": "Yes"},
         "its":          {LEMMA: PRON_LEMMA, "PronType": "Prs", "Person": "Three", "Number": "Sing", "Gender": "Neut", "Poss": "Yes", "Reflex": "Yes"},
diff --git a/spacy/fi/tokenizer_exceptions.py b/spacy/fi/tokenizer_exceptions.py
index 52ea7428a..09775a2f4 100644
--- a/spacy/fi/tokenizer_exceptions.py
+++ b/spacy/fi/tokenizer_exceptions.py
@@ -193,9 +193,6 @@ TOKENIZER_EXCEPTIONS = {
     "vm.": [
         {ORTH: "vm.", LEMMA: "viimeksi mainittu"}
     ],
-    "siht.": [
-        {ORTH: "siht.", LEMMA: "sihteeri"}
-    ],
     "srk.": [
         {ORTH: "srk.", LEMMA: "seurakunta"}
     ]

From 81b28ca606bb956b74849bf7971cb0b381431887 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 18:01:51 +0100
Subject: [PATCH 02/30] Update models docs with info on retraining own models

---
 website/docs/usage/models.jade | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade
index d45d8d45e..ae1417a29 100644
--- a/website/docs/usage/models.jade
+++ b/website/docs/usage/models.jade
@@ -14,9 +14,12 @@ p
     |  model name.
 
 +infobox("Important note")
-    |  Due to improvements in the English lemmatizer in v1.7.0, you need to download the
-    |  new English model. The German model is still compatible and will be
-    |  recognised and linked automatically.
+    |  Due to improvements in the English lemmatizer in v1.7.0, you need to
+    |  #[strong download the new English models]. The German model is still
+    |  compatible. If you've trained statistical models that use spaCy's
+    |  annotations, you should #[strong retrain your models after updating spaCy].
+    |  If you don't retrain your models, you may suffer train/test skew, which
+    |  might decrease your accuracy.
 
 +aside-code("Quickstart").
     # Install spaCy and download English model

From adbcac65918137634e080e3e4689c3456cde593b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:48:21 +0100
Subject: [PATCH 03/30] Fix spacing

---
 spacy/util.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/spacy/util.py b/spacy/util.py
index 49c51b436..d1252e41d 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -13,6 +13,7 @@ import textwrap
 
 from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
 
+
 try:
     basestring
 except NameError:

From a6c036180344899471c715ad5798b78c116fdee5 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:48:32 +0100
Subject: [PATCH 04/30] Handle raw_input vs input in Python 2 and 3

---
 spacy/util.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/spacy/util.py b/spacy/util.py
index d1252e41d..f8fc76b05 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -20,6 +20,12 @@ except NameError:
     basestring = str
 
 
+try:
+    raw_input
+except NameError: # Python 3
+    raw_input = input
+
+
 LANGUAGES = {}
 _data_path = pathlib.Path(__file__).parent / 'data'
 

From 5aea327a5b5a4fe74b023d6ecad0689a41ee895f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:48:56 +0100
Subject: [PATCH 05/30] Add util function to get raw user input

---
 spacy/util.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/spacy/util.py b/spacy/util.py
index f8fc76b05..1f1cdbb6e 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -168,6 +168,17 @@ def parse_package_meta(package_path, package, require=True):
         return None
 
 
+def get_raw_input(description, default=False):
+    """Get user input via raw_input / input and return input value. Takes a
+    description for the prompt, and an optional default value that's displayed
+    with the prompt."""
+
+    additional = ' (default: {d})'.format(d=default) if default else ''
+    prompt = '    {d}{a}: '.format(d=description, a=additional)
+    user_input = raw_input(prompt)
+    return user_input
+
+
 def print_table(data, **kwargs):
     """Print data in table format. Can either take a list of tuples or a
     dictionary, which will be converted to a list of tuples."""

From a54e3c2efe6d388b1cb2fa0bc9fc867165c9025d Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:49:36 +0100
Subject: [PATCH 06/30] Remove empty line

---
 spacy/__main__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/spacy/__main__.py b/spacy/__main__.py
index 9addbccde..cf740c8fe 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -1,5 +1,4 @@
 # coding: utf8
-# 
 from __future__ import print_function
 # NB! This breaks in plac on Python 2!!
 #from __future__ import unicode_literals,

From bf240132d70b497e6c5c57407e5ca1cfdc9b17e3 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:50:13 +0100
Subject: [PATCH 07/30] Add cli.package command to build model packages

---
 spacy/__main__.py     |  17 ++++-
 spacy/cli/__init__.py |   1 +
 spacy/cli/package.py  | 149 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 166 insertions(+), 1 deletion(-)
 create mode 100644 spacy/cli/package.py

diff --git a/spacy/__main__.py b/spacy/__main__.py
index cf740c8fe..ba34c478f 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -7,12 +7,13 @@ import plac
 from spacy.cli import download as cli_download
 from spacy.cli import link as cli_link
 from spacy.cli import info as cli_info
+from spacy.cli import package as cli_package
 
 
 class CLI(object):
     """Command-line interface for spaCy"""
 
-    commands = ('download', 'link', 'info')
+    commands = ('download', 'link', 'info', 'package')
 
     @plac.annotations(
         model=("model to download (shortcut or model name)", "positional", None, str),
@@ -58,6 +59,20 @@ class CLI(object):
         cli_info(model, markdown)
 
 
+    @plac.annotations(
+        input_dir=("directory with model data", "positional", None, str),
+        output_dir=("output directory", "positional", None, str)
+    )
+    def package(self, input_dir, output_dir):
+        """
+        Generate Python package for model data, including meta and required
+        installation files. A new directory will be created in the specified
+        output directory.
+        """
+
+        cli_package(input_dir, output_dir)
+
+
     def __missing__(self, name):
         print("\n   Command %r does not exist\n" % name)
 
diff --git a/spacy/cli/__init__.py b/spacy/cli/__init__.py
index 2c45b471a..2383e04b9 100644
--- a/spacy/cli/__init__.py
+++ b/spacy/cli/__init__.py
@@ -1,3 +1,4 @@
 from .download import download
 from .info import info
 from .link import link
+from .package import package
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
new file mode 100644
index 000000000..9d1ff7183
--- /dev/null
+++ b/spacy/cli/package.py
@@ -0,0 +1,149 @@
+# coding: utf8
+from __future__ import unicode_literals
+
+import json
+from shutil import copytree
+from pathlib import Path
+
+from .. import about
+from .. import util
+
+
+def package(input_dir, output_dir):
+    input_path = Path(input_dir)
+    output_path = Path(output_dir)
+    check_dirs(input_path, output_path)
+
+    meta = generate_meta()
+    model_name = meta['lang'] + '_' + meta['name']
+    model_name_v = model_name + '-' + meta['version']
+    main_path = output_path / model_name_v
+    package_path = main_path / model_name
+
+    Path.mkdir(package_path, parents=True)
+    copytree(input_path, package_path / model_name_v)
+    create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
+    create_file(main_path / 'setup.py', TEMPLATE_SETUP.strip())
+    create_file(main_path / 'MANIFEST.in', TEMPLATE_MANIFEST.strip())
+    create_file(package_path / '__init__.py', TEMPLATE_INIT.strip())
+
+    util.print_msg(
+        main_path.as_posix(),
+        "To build the package, run python setup.py sdist in that directory.",
+        title="Successfully reated package {p}".format(p=model_name_v))
+
+
+def check_dirs(input_path, output_path):
+    if not input_path.exists():
+        util.sys_exit(input_path.as_poisx(), title="Model directory not found")
+    if not output_path.exists():
+        util.sys_exit(output_path.as_posix(), title="Output directory not found")
+
+
+def create_file(file_path, contents):
+    file_path.touch()
+    file_path.write_text(contents, encoding='utf-8')
+
+
+def generate_meta():
+    settings = [('lang', 'Model language', 'en'),
+                ('name', 'Model name', 'model'),
+                ('version', 'Model version', '0.0.0'),
+                ('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
+                ('description', 'Model description', False),
+                ('author', 'Author', False),
+                ('email', 'Author email', False),
+                ('url', 'Author website', False),
+                ('license', 'License', 'MIT')]
+
+    util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
+
+    meta = {}
+    for setting, desc, default in settings:
+        response = util.get_raw_input(desc, default)
+        meta[setting] = default if response == '' and default else response
+    return meta
+
+
+TEMPLATE_MANIFEST = """
+include meta.json
+"""
+
+
+TEMPLATE_SETUP = """
+#!/usr/bin/env python
+# coding: utf8
+from __future__ import unicode_literals
+
+import io
+import json
+from os import path, walk
+from shutil import copy
+from setuptools import setup
+
+
+def load_meta(fp):
+    with io.open(fp, encoding='utf8') as f:
+        return json.load(f)
+
+
+def list_files(data_dir):
+    output = []
+    for root, _, filenames in walk(data_dir):
+        for filename in filenames:
+            if not filename.startswith('.'):
+                output.append(path.join(root, filename))
+    output = [path.relpath(p, path.dirname(data_dir)) for p in output]
+    output.append('meta.json')
+    return output
+
+
+def setup_package():
+    root = path.abspath(path.dirname(__file__))
+    meta_path = path.join(root, 'meta.json')
+    meta = load_meta(meta_path)
+    model_name = str(meta['lang'] + '_' + meta['name'])
+    model_dir = path.join(model_name, model_name + '-' + meta['version'])
+
+    copy(meta_path, path.join(root, model_name))
+    copy(meta_path, path.join(root, model_dir))
+
+    setup(
+        name=model_name,
+        description=meta['description'],
+        author=meta['author'],
+        author_email=meta['email'],
+        url=meta['url'],
+        version=meta['version'],
+        license=meta['license'],
+        packages=[model_name],
+        package_data={model_name: list_files(model_dir)},
+        install_requires=['spacy' + meta['spacy_version']],
+        zip_safe=False,
+    )
+
+
+if __name__ == '__main__':
+    setup_package()
+"""
+
+
+TEMPLATE_INIT = """
+from pathlib import Path
+from spacy.util import get_lang_class
+import pkg_resources
+import json
+
+
+def load_meta():
+    with (Path(__file__).parent / 'meta.json').open() as f:
+        return json.load(f)
+
+
+def load(**kwargs):
+    meta = load_meta()
+    version = meta['version']
+    data_dir = pkg_resources.resource_filename(__name__, __name__ + '-' + version)
+    lang = get_lang_class(meta['lang'])
+    return lang(path=Path(data_dir), **kwargs)
+"""

From b2bcdec0f608dcf64147e94f1547b267baf007b6 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Mon, 20 Mar 2017 22:50:55 +0100
Subject: [PATCH 08/30] Update docstring

---
 spacy/__main__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/__main__.py b/spacy/__main__.py
index ba34c478f..23d87acb3 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -67,7 +67,7 @@ class CLI(object):
         """
         Generate Python package for model data, including meta and required
         installation files. A new directory will be created in the specified
-        output directory.
+        output directory, and model data will be copied over.
         """
 
         cli_package(input_dir, output_dir)

From 8eb9a2b35503a54ac7d3aca403e4d48516a8900b Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 02:05:14 +0100
Subject: [PATCH 09/30] Fix formatting

---
 spacy/__main__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/__main__.py b/spacy/__main__.py
index 23d87acb3..e539ed78d 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -32,8 +32,8 @@ class CLI(object):
 
     @plac.annotations(
         origin=("package name or local path to model", "positional", None, str),
-        link_name=("Name of shortuct link to create", "positional", None, str),
-        force=("Force overwriting of existing link", "flag", "f", bool)
+        link_name=("name of shortuct link to create", "positional", None, str),
+        force=("force overwriting of existing link", "flag", "f", bool)
     )
     def link(self, origin, link_name, force=False):
         """

From 448a916d0d3e3af2d5fe161cd5ed5828e6517ab7 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 02:05:34 +0100
Subject: [PATCH 10/30] Add --force option to override directory

---
 spacy/__main__.py    | 7 ++++---
 spacy/cli/package.py | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/spacy/__main__.py b/spacy/__main__.py
index e539ed78d..a5ba66fee 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -61,16 +61,17 @@ class CLI(object):
 
     @plac.annotations(
         input_dir=("directory with model data", "positional", None, str),
-        output_dir=("output directory", "positional", None, str)
+        output_dir=("output directory", "positional", None, str),
+        force=("force overwriting of existing output directory", "flag", "f", bool)
     )
-    def package(self, input_dir, output_dir):
+    def package(self, input_dir, output_dir, force=False):
         """
         Generate Python package for model data, including meta and required
         installation files. A new directory will be created in the specified
         output directory, and model data will be copied over.
         """
 
-        cli_package(input_dir, output_dir)
+        cli_package(input_dir, output_dir, force)
 
 
     def __missing__(self, name):
diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 9d1ff7183..abd3f6e4e 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -9,7 +9,7 @@ from .. import about
 from .. import util
 
 
-def package(input_dir, output_dir):
+def package(input_dir, output_dir, force):
     input_path = Path(input_dir)
     output_path = Path(output_dir)
     check_dirs(input_path, output_path)

From 64e38f304e1f9374e3b731e3721434fe33867bb4 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 02:06:29 +0100
Subject: [PATCH 11/30] Only import shutil

---
 spacy/cli/package.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index abd3f6e4e..d23b03821 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -2,7 +2,7 @@
 from __future__ import unicode_literals
 
 import json
-from shutil import copytree
+import shutil
 from pathlib import Path
 
 from .. import about
@@ -21,7 +21,7 @@ def package(input_dir, output_dir, force):
     package_path = main_path / model_name
 
     Path.mkdir(package_path, parents=True)
-    copytree(input_path, package_path / model_name_v)
+    shutil.copytree(input_path, package_path / model_name_v)
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
     create_file(main_path / 'setup.py', TEMPLATE_SETUP.strip())
     create_file(main_path / 'MANIFEST.in', TEMPLATE_MANIFEST.strip())

From 46bc3c36b00dfb95f90da5769181d0255f860cd8 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 02:06:37 +0100
Subject: [PATCH 12/30] Fix typo

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index d23b03821..bf424e075 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -30,7 +30,7 @@ def package(input_dir, output_dir, force):
     util.print_msg(
         main_path.as_posix(),
         "To build the package, run python setup.py sdist in that directory.",
-        title="Successfully reated package {p}".format(p=model_name_v))
+        title="Successfully created package {p}".format(p=model_name_v))
 
 
 def check_dirs(input_path, output_path):

From 5230ed5b98e3f1e9c83b1205c87db962c5844804 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 02:06:53 +0100
Subject: [PATCH 13/30] Move directory check and overwriting/creating dirs to
 own function

---
 spacy/cli/package.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index bf424e075..59b45ab5f 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -20,7 +20,7 @@ def package(input_dir, output_dir, force):
     main_path = output_path / model_name_v
     package_path = main_path / model_name
 
-    Path.mkdir(package_path, parents=True)
+    create_dirs(package_path, force)
     shutil.copytree(input_path, package_path / model_name_v)
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
     create_file(main_path / 'setup.py', TEMPLATE_SETUP.strip())
@@ -40,6 +40,17 @@ def check_dirs(input_path, output_path):
         util.sys_exit(output_path.as_posix(), title="Output directory not found")
 
 
+def create_dirs(package_path, force):
+    if package_path.exists():
+        if force:
+            shutil.rmtree(package_path)
+        else:
+            util.sys_exit(package_path.as_posix(),
+                "Please delete the directory and try again.",
+                title="Package directory already exists")
+    Path.mkdir(package_path, parents=True)
+
+
 def create_file(file_path, contents):
     file_path.touch()
     file_path.write_text(contents, encoding='utf-8')

From 3f4e3fda1d21a90b5d7b3e3fe70e650120b19c84 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 11:17:36 +0100
Subject: [PATCH 14/30] Update command and fetch file templates from GitHub

While feature is still experimental, this allows files to be modified
without having to ship a new version of spaCy.
---
 spacy/cli/package.py | 105 ++++++++-----------------------------------
 1 file changed, 18 insertions(+), 87 deletions(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 59b45ab5f..6a0f36ff9 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -3,6 +3,7 @@ from __future__ import unicode_literals
 
 import json
 import shutil
+import requests
 from pathlib import Path
 
 from .. import about
@@ -14,7 +15,11 @@ def package(input_dir, output_dir, force):
     output_path = Path(output_dir)
     check_dirs(input_path, output_path)
 
+    template_setup = get_template('setup.py')
+    template_init = get_template('en_model_name/__init__.py')
+    template_manifest = 'include meta.json'
     meta = generate_meta()
+
     model_name = meta['lang'] + '_' + meta['name']
     model_name_v = model_name + '-' + meta['version']
     main_path = output_path / model_name_v
@@ -23,13 +28,13 @@ def package(input_dir, output_dir, force):
     create_dirs(package_path, force)
     shutil.copytree(input_path, package_path / model_name_v)
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
-    create_file(main_path / 'setup.py', TEMPLATE_SETUP.strip())
-    create_file(main_path / 'MANIFEST.in', TEMPLATE_MANIFEST.strip())
-    create_file(package_path / '__init__.py', TEMPLATE_INIT.strip())
+    create_file(main_path / 'setup.py', template_setup)
+    create_file(main_path / 'MANIFEST.in', template_manifest)
+    create_file(package_path / '__init__.py', template_init)
 
     util.print_msg(
         main_path.as_posix(),
-        "To build the package, run python setup.py sdist in that directory.",
+        "To build the package, run `python setup.py sdist` in that directory.",
         title="Successfully created package {p}".format(p=model_name_v))
 
 
@@ -60,7 +65,7 @@ def generate_meta():
     settings = [('lang', 'Model language', 'en'),
                 ('name', 'Model name', 'model'),
                 ('version', 'Model version', '0.0.0'),
-                ('spacy_version', 'Required spaCy version', '>=2.0.0,<3.0.0'),
+                ('spacy_version', 'Required spaCy version', '>=1.7.0,<2.0.0'),
                 ('description', 'Model description', False),
                 ('author', 'Author', False),
                 ('email', 'Author email', False),
@@ -76,85 +81,11 @@ def generate_meta():
     return meta
 
 
-TEMPLATE_MANIFEST = """
-include meta.json
-"""
-
-
-TEMPLATE_SETUP = """
-#!/usr/bin/env python
-# coding: utf8
-from __future__ import unicode_literals
-
-import io
-import json
-from os import path, walk
-from shutil import copy
-from setuptools import setup
-
-
-def load_meta(fp):
-    with io.open(fp, encoding='utf8') as f:
-        return json.load(f)
-
-
-def list_files(data_dir):
-    output = []
-    for root, _, filenames in walk(data_dir):
-        for filename in filenames:
-            if not filename.startswith('.'):
-                output.append(path.join(root, filename))
-    output = [path.relpath(p, path.dirname(data_dir)) for p in output]
-    output.append('meta.json')
-    return output
-
-
-def setup_package():
-    root = path.abspath(path.dirname(__file__))
-    meta_path = path.join(root, 'meta.json')
-    meta = load_meta(meta_path)
-    model_name = str(meta['lang'] + '_' + meta['name'])
-    model_dir = path.join(model_name, model_name + '-' + meta['version'])
-
-    copy(meta_path, path.join(root, model_name))
-    copy(meta_path, path.join(root, model_dir))
-
-    setup(
-        name=model_name,
-        description=meta['description'],
-        author=meta['author'],
-        author_email=meta['email'],
-        url=meta['url'],
-        version=meta['version'],
-        license=meta['license'],
-        packages=[model_name],
-        package_data={model_name: list_files(model_dir)},
-        install_requires=['spacy' + meta['spacy_version']],
-        zip_safe=False,
-    )
-
-
-if __name__ == '__main__':
-    setup_package()
-"""
-
-
-TEMPLATE_INIT = """
-from pathlib import Path
-from spacy.util import get_lang_class
-import pkg_resources
-import json
-
-
-def load_meta():
-    with (Path(__file__).parent / 'meta.json').open() as f:
-        return json.load(f)
-
-
-def load(**kwargs):
-    meta = load_meta()
-    version = meta['version']
-    data_dir = pkg_resources.resource_filename(__name__, __name__ + '-' + version)
-    lang = get_lang_class(meta['lang'])
-    return lang(path=Path(data_dir), **kwargs)
-"""
+def get_template(filepath):
+    url = 'https://raw.githubusercontent.com/explosion/spacy-dev-resources/master/templates/model/'
+    r = requests.get(url + filepath)
+    if r.status_code != 200:
+        util.sys_exit(
+            "Couldn't fetch template files from GitHub.",
+            title="Server error ({c})".format(c=r.status_code))
+    return r.text

From 09b24bc5a9dfd69d2f95c0225599c44170659351 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 11:19:21 +0100
Subject: [PATCH 15/30] Add docs for package command

---
 spacy/__main__.py           |  2 +-
 website/docs/usage/cli.jade | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/spacy/__main__.py b/spacy/__main__.py
index a5ba66fee..cde146cba 100644
--- a/spacy/__main__.py
+++ b/spacy/__main__.py
@@ -62,7 +62,7 @@ class CLI(object):
     @plac.annotations(
         input_dir=("directory with model data", "positional", None, str),
         output_dir=("output directory", "positional", None, str),
-        force=("force overwriting of existing output directory", "flag", "f", bool)
+        force=("force overwriting of existing folder in output directory", "flag", "f", bool)
     )
     def package(self, input_dir, output_dir, force=False):
         """
diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade
index 990117542..4a9ba3dd1 100644
--- a/website/docs/usage/cli.jade
+++ b/website/docs/usage/cli.jade
@@ -103,3 +103,40 @@ p
         +cell #[code --help], #[code -h]
         +cell flag
         +cell Show help message and available arguments.
+
++h(2, "package") Package
+    +tag experimental
+
+p
+    |  Generate a #[+a("/docs/usage/models#own-models") model Python package]
+    |  from an existing model data directory. All data files are copied over,
+    |  and the meta data can be entered directly from the command line. While
+    |  this feature is still experimental, the templates for the
+    |  #[+src(gh("spacy-dev-resources", "templates/model/setup.py")) setup.py] and
+    |  #[+src(gh("spacy-dev-resources", "templates/model/en_morel_name/__init__.py")) __init__.py]
+    |  are downloaded from GitHub. This means you need to be connected to the
+    |  internet to use this command.
+
++code(false, "bash").
+    python -m spacy package [input_dir] [output_dir] [--force]
+
++table(["Argument", "Type", "Description"])
+    +row
+        +cell #[code input_dir]
+        +cell positional
+        +cell Path to directory containing model data.
+
+    +row
+        +cell #[code output_dir]
+        +cell positional
+        +cell Directory to create package folder in.
+
+    +row
+        +cell #[code --force], #[code -f]
+        +cell flag
+        +cell Force overwriting of existing folder in output directory.
+
+    +row
+        +cell #[code --help], #[code -h]
+        +cell flag
+        +cell Show help message and available arguments.

From 49bbfdaac1036f052b38f6991ea65de8efc3478f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 11:25:01 +0100
Subject: [PATCH 16/30] Add info on CLI to docs on own models

---
 website/docs/usage/models.jade | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/website/docs/usage/models.jade b/website/docs/usage/models.jade
index ae1417a29..39c271df4 100644
--- a/website/docs/usage/models.jade
+++ b/website/docs/usage/models.jade
@@ -238,7 +238,11 @@ p
     |  #[+a("/docs/usage/adding-languages") additional languages], you can
     |  create a shortuct link for it by pointing #[code spacy.link] to the
     |  model's data directory. To allow your model to be downloaded and
-    |  installed via pip, you'll also need to generate a package for it.
+    |  installed via pip, you'll also need to generate a package for it. You can
+    |  do this manually, or via the new
+    |  #[+a("/docs/usage/cli#package") #[code spacy package] command] that will
+    |  create all required files, and walk you through generating the meta data.
+
 
 +infobox("Important note")
     |  The model packages are #[strong not suitable] for the public

From cf0094187e356e9b762a1796b092734d4e30d654 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 11:32:38 +0100
Subject: [PATCH 17/30] Fetch MANIFEST.in from GitHub as well

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 6a0f36ff9..5894ec049 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -16,8 +16,8 @@ def package(input_dir, output_dir, force):
     check_dirs(input_path, output_path)
 
     template_setup = get_template('setup.py')
+    template_manifest = get_template('MANIFEST.in')
     template_init = get_template('en_model_name/__init__.py')
-    template_manifest = 'include meta.json'
     meta = generate_meta()
 
     model_name = meta['lang'] + '_' + meta['name']

From fa6e3cefbb482e97d0c21c51d852bcda5f31b089 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 11:35:29 +0100
Subject: [PATCH 18/30] Simplify package command docs

---
 website/docs/usage/cli.jade | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/website/docs/usage/cli.jade b/website/docs/usage/cli.jade
index 4a9ba3dd1..66be83923 100644
--- a/website/docs/usage/cli.jade
+++ b/website/docs/usage/cli.jade
@@ -111,11 +111,9 @@ p
     |  Generate a #[+a("/docs/usage/models#own-models") model Python package]
     |  from an existing model data directory. All data files are copied over,
     |  and the meta data can be entered directly from the command line. While
-    |  this feature is still experimental, the templates for the
-    |  #[+src(gh("spacy-dev-resources", "templates/model/setup.py")) setup.py] and
-    |  #[+src(gh("spacy-dev-resources", "templates/model/en_morel_name/__init__.py")) __init__.py]
-    |  are downloaded from GitHub. This means you need to be connected to the
-    |  internet to use this command.
+    |  this feature is still experimental, the required file templates are
+    |  downloaded from #[+src(gh("spacy-dev-resources", "templates/model")) GitHub].
+    |  This means you need to be connected to the internet to use this command.
 
 +code(false, "bash").
     python -m spacy package [input_dir] [output_dir] [--force]

From 3e134b5b2b5fdf7866ce63201be764897861a4b7 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 12:15:33 +0100
Subject: [PATCH 19/30] Make sure paths in copytree and rmtree are strings

---
 spacy/cli/package.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 5894ec049..a5c41adec 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -26,7 +26,7 @@ def package(input_dir, output_dir, force):
     package_path = main_path / model_name
 
     create_dirs(package_path, force)
-    shutil.copytree(input_path, package_path / model_name_v)
+    shutil.copytree((input_path, package_path / model_name_v).as_posix())
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
     create_file(main_path / 'setup.py', template_setup)
     create_file(main_path / 'MANIFEST.in', template_manifest)
@@ -48,7 +48,7 @@ def check_dirs(input_path, output_path):
 def create_dirs(package_path, force):
     if package_path.exists():
         if force:
-            shutil.rmtree(package_path)
+            shutil.rmtree(package_path.as_posix())
         else:
             util.sys_exit(package_path.as_posix(),
                 "Please delete the directory and try again.",

From ae466475607540a0b439a808dde6a5d553c6084f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 12:21:42 +0100
Subject: [PATCH 20/30] Fix brackets

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index a5c41adec..5072e272b 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -26,7 +26,7 @@ def package(input_dir, output_dir, force):
     package_path = main_path / model_name
 
     create_dirs(package_path, force)
-    shutil.copytree((input_path, package_path / model_name_v).as_posix())
+    shutil.copytree(input_path, (package_path / model_name_v).as_posix())
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
     create_file(main_path / 'setup.py', template_setup)
     create_file(main_path / 'MANIFEST.in', template_manifest)

From 83a999ea83e7298ce9b999d91087dc8556f57956 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 12:24:43 +0100
Subject: [PATCH 21/30] Change default license from MIT to CC

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index 5072e272b..e2c8000b3 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -70,7 +70,7 @@ def generate_meta():
                 ('author', 'Author', False),
                 ('email', 'Author email', False),
                 ('url', 'Author website', False),
-                ('license', 'License', 'MIT')]
+                ('license', 'License', 'CC BY-NC 3.0')]
 
     util.print_msg("Enter the package settings for your model.", title="Generating meta.json")
 

From d74aa428ad6137b585ea000bdd8f2dde3c4da03d Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 12:26:00 +0100
Subject: [PATCH 22/30] Fix path

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index e2c8000b3..b4dc76e9a 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -26,7 +26,7 @@ def package(input_dir, output_dir, force):
     package_path = main_path / model_name
 
     create_dirs(package_path, force)
-    shutil.copytree(input_path, (package_path / model_name_v).as_posix())
+    shutil.copytree(input_path.as_posix(), (package_path / model_name_v).as_posix())
     create_file(main_path / 'meta.json', json.dumps(meta, indent=2))
     create_file(main_path / 'setup.py', template_setup)
     create_file(main_path / 'MANIFEST.in', template_manifest)

From c3a9f738960356dc395a789b786aa095bdad8e4f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Tue, 21 Mar 2017 12:35:22 +0100
Subject: [PATCH 23/30] Fix writing to file

---
 spacy/cli/package.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/spacy/cli/package.py b/spacy/cli/package.py
index b4dc76e9a..5cab2b4bc 100644
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@@ -58,7 +58,7 @@ def create_dirs(package_path, force):
 
 def create_file(file_path, contents):
     file_path.touch()
-    file_path.write_text(contents, encoding='utf-8')
+    file_path.open('w').write(contents, encoding='utf-8')
 
 
 def generate_meta():

From f332bf05be536cfffc80205269116d7fb0a0e363 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= <raphael@bournhonesque.eu>
Date: Tue, 21 Mar 2017 21:08:54 +0100
Subject: [PATCH 24/30] Remove unused import statements

---
 spacy/cfile.pyx      | 2 +-
 spacy/gold.pyx       | 4 ----
 spacy/language.py    | 2 --
 spacy/matcher.pyx    | 7 ++-----
 spacy/morphology.pyx | 4 ----
 spacy/pipeline.pyx   | 1 -
 spacy/tagger.pyx     | 6 +-----
 spacy/tokenizer.pyx  | 3 ---
 spacy/util.py        | 3 ---
 9 files changed, 4 insertions(+), 28 deletions(-)

diff --git a/spacy/cfile.pyx b/spacy/cfile.pyx
index ceebe2e59..d5d4bf353 100644
--- a/spacy/cfile.pyx
+++ b/spacy/cfile.pyx
@@ -1,4 +1,4 @@
-from libc.stdio cimport fopen, fclose, fread, fwrite, FILE
+from libc.stdio cimport fopen, fclose, fread, fwrite
 from libc.string cimport memcpy
 
 
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 358412fab..471018109 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -1,16 +1,12 @@
 # cython: profile=True
 from __future__ import unicode_literals, print_function
 
-import numpy
 import io
 import json
-import random
 import re
 import os
 from os import path
 
-from libc.string cimport memset
-
 import ujson as json
 
 from .syntax import nonproj
diff --git a/spacy/language.py b/spacy/language.py
index 573bb5a86..4542eae3b 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -1,6 +1,5 @@
 from __future__ import absolute_import
 from __future__ import unicode_literals
-from warnings import warn
 import pathlib
 from contextlib import contextmanager
 import shutil
@@ -33,7 +32,6 @@ from .attrs import TAG, DEP, ENT_IOB, ENT_TYPE, HEAD, PROB, LANG, IS_STOP
 from .syntax.parser import get_templates
 from .syntax.nonproj import PseudoProjectivity
 from .pipeline import DependencyParser, EntityRecognizer
-from .pipeline import BeamDependencyParser, BeamEntityRecognizer
 from .syntax.arc_eager import ArcEager
 from .syntax.ner import BiluoPushDown
 
diff --git a/spacy/matcher.pyx b/spacy/matcher.pyx
index 5c52ae9d0..1883ae89a 100644
--- a/spacy/matcher.pyx
+++ b/spacy/matcher.pyx
@@ -2,13 +2,10 @@
 # cython: infer_types=True
 from __future__ import unicode_literals
 
-from os import path
-
 from .typedefs cimport attr_t
 from .typedefs cimport hash_t
 from .attrs cimport attr_id_t
-from .structs cimport TokenC, LexemeC
-from .lexeme cimport Lexeme
+from .structs cimport TokenC
 
 from cymem.cymem cimport Pool
 from preshed.maps cimport PreshMap
@@ -17,7 +14,7 @@ from libcpp.pair cimport pair
 from murmurhash.mrmr cimport hash64
 from libc.stdint cimport int32_t
 
-from .attrs cimport ID, LENGTH, ENT_TYPE, ORTH, NORM, LEMMA, LOWER, SHAPE
+from .attrs cimport ID, ENT_TYPE
 from . import attrs
 from .tokens.doc cimport get_token_attr
 from .tokens.doc cimport Doc
diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx
index 26405e988..e98ef1e92 100644
--- a/spacy/morphology.pyx
+++ b/spacy/morphology.pyx
@@ -1,12 +1,8 @@
 # cython: infer_types
 from __future__ import unicode_literals
 
-from os import path
-
 from libc.string cimport memset
 
-from .lemmatizer import Lemmatizer
-
 try:
     import ujson as json
 except ImportError:
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 59e1994a9..b2d622329 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -2,7 +2,6 @@ from .syntax.parser cimport Parser
 from .syntax.beam_parser cimport BeamParser
 from .syntax.ner cimport BiluoPushDown
 from .syntax.arc_eager cimport ArcEager
-from .vocab cimport Vocab
 from .tagger import Tagger
 
 # TODO: The disorganization here is pretty embarrassing. At least it's only
diff --git a/spacy/tagger.pyx b/spacy/tagger.pyx
index 1f6b587c5..4a2ef082a 100644
--- a/spacy/tagger.pyx
+++ b/spacy/tagger.pyx
@@ -1,20 +1,16 @@
 import json
 import pathlib
 from collections import defaultdict
-from libc.string cimport memset
 
 from cymem.cymem cimport Pool
-from thinc.typedefs cimport atom_t, weight_t
+from thinc.typedefs cimport atom_t
 from thinc.extra.eg cimport Example
 from thinc.structs cimport ExampleC
 from thinc.linear.avgtron cimport AveragedPerceptron
 from thinc.linalg cimport VecVec
 
-from .typedefs cimport attr_t
 from .tokens.doc cimport Doc
 from .attrs cimport TAG
-from .parts_of_speech cimport NO_TAG, ADJ, ADV, ADP, CCONJ, DET, NOUN, NUM, PRON
-from .parts_of_speech cimport VERB, X, PUNCT, EOL, SPACE
 from .gold cimport GoldParse
 
 from .attrs cimport *
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index 5a4eb844a..42f090cde 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -1,13 +1,10 @@
 # cython: embedsignature=True
 from __future__ import unicode_literals
 
-import re
 import pathlib
 
 from cython.operator cimport dereference as deref
 from cython.operator cimport preincrement as preinc
-from cpython cimport Py_UNICODE_ISSPACE
-
 
 try:
     import ujson as json
diff --git a/spacy/util.py b/spacy/util.py
index 49c51b436..b255b92db 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -8,11 +8,8 @@ import os.path
 import pathlib
 import sys
 
-import six
 import textwrap
 
-from .attrs import TAG, HEAD, DEP, ENT_IOB, ENT_TYPE
-
 try:
     basestring
 except NameError:

From 7568cd6bf8a156a37e3c254ea65f5a479102c424 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= <raphael@bournhonesque.eu>
Date: Tue, 21 Mar 2017 23:00:13 +0100
Subject: [PATCH 25/30] Split CONLLX file using tabs and not default split
 separators

---
 bin/parser/train_ud.py | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/bin/parser/train_ud.py b/bin/parser/train_ud.py
index c87f40680..98a93dd88 100644
--- a/bin/parser/train_ud.py
+++ b/bin/parser/train_ud.py
@@ -1,18 +1,13 @@
 from __future__ import unicode_literals
 import plac
 import json
-from os import path
-import shutil
-import os
 import random
-import io
 import pathlib
 
 from spacy.tokens import Doc
 from spacy.syntax.nonproj import PseudoProjectivity
 from spacy.language import Language
 from spacy.gold import GoldParse
-from spacy.vocab import Vocab
 from spacy.tagger import Tagger
 from spacy.pipeline import DependencyParser, BeamDependencyParser
 from spacy.syntax.parser import get_templates
@@ -23,7 +18,6 @@ import spacy.attrs
 import io
 
 
-
 def read_conllx(loc, n=0):
     with io.open(loc, 'r', encoding='utf8') as file_:
         text = file_.read()
@@ -35,7 +29,8 @@ def read_conllx(loc, n=0):
                 lines.pop(0)
             tokens = []
             for line in lines:
-                id_, word, lemma, pos, tag, morph, head, dep, _1, _2 = line.split()
+                id_, word, lemma, pos, tag, morph, head, dep, _1, \
+                _2 = line.split('\t')
                 if '-' in id_ or '.' in id_:
                     continue
                 try:

From 08346dba1a94989c6a286e51a122a0f2661592d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Rapha=C3=ABl=20Bournhonesque?= <raphael@bournhonesque.eu>
Date: Tue, 21 Mar 2017 23:18:54 +0100
Subject: [PATCH 26/30] Use specific language class instead of base Language
 class

---
 bin/parser/train_ud.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/bin/parser/train_ud.py b/bin/parser/train_ud.py
index 98a93dd88..afc4491cb 100644
--- a/bin/parser/train_ud.py
+++ b/bin/parser/train_ud.py
@@ -129,7 +129,7 @@ def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
         random.shuffle(train_sents)
         scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc))
         print('%d:\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas, scorer.tags_acc))
-    nlp = Language(vocab=vocab, tagger=tagger, parser=parser)
+    nlp = LangClass(vocab=vocab, tagger=tagger, parser=parser)
     nlp.end_training(model_dir)
     scorer = score_model(vocab, tagger, parser, read_conllx(dev_loc))
     print('%d:\t%.3f\t%.3f\t%.3f' % (itn, scorer.uas, scorer.las, scorer.tags_acc))

From 07199c3e8b1f7f91c41e7d19f364c902d3e9590b Mon Sep 17 00:00:00 2001
From: Andrew Poliakov <pavlin99th@me.com>
Date: Wed, 22 Mar 2017 11:43:22 +0300
Subject: [PATCH 27/30] Fix infinite recursion in spacy.info

---
 spacy/__init__.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index 70b3363d6..62ab41c90 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -49,7 +49,3 @@ def load(name, **overrides):
         overrides['path'] = model_path
 
     return cls(**overrides)
-
-
-def info(name, markdown):
-    info(name, markdown)

From ce065e5d65bc2a880d9e1993129b6beff6468c39 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 22 Mar 2017 10:02:14 +0100
Subject: [PATCH 28/30] Fix imports

---
 spacy/__init__.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/spacy/__init__.py b/spacy/__init__.py
index 70b3363d6..80bd1c539 100644
--- a/spacy/__init__.py
+++ b/spacy/__init__.py
@@ -5,7 +5,7 @@ import json
 from pathlib import Path
 from .util import set_lang_class, get_lang_class, parse_package_meta
 from .deprecated import resolve_model_name
-from .cli.info import info
+from .cli import info
 
 from . import en
 from . import de
@@ -49,7 +49,3 @@ def load(name, **overrides):
         overrides['path'] = model_path
 
     return cls(**overrides)
-
-
-def info(name, markdown):
-    info(name, markdown)

From 4a9a1126a4aabfeb20fe555c042d333b1d6c982f Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Wed, 22 Mar 2017 10:02:59 +0100
Subject: [PATCH 29/30] Update syntax highlighting color scheme

---
 website/assets/css/_variables.sass | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/website/assets/css/_variables.sass b/website/assets/css/_variables.sass
index bfef915be..1c38d114a 100644
--- a/website/assets/css/_variables.sass
+++ b/website/assets/css/_variables.sass
@@ -44,7 +44,7 @@ $color-red: #d9515d
 $color-green: #3ec930
 $color-yellow: #f4c025
 
-$syntax-highlighting: ( comment: #949e9b, tag: #3ec930, number: #B084EB, selector: #FFB86C, operator: #FF2C6D, function: #09a3d5, keyword: #45A9F9, regex: #f4c025 )
+$syntax-highlighting: ( comment: #949e9b, tag: #b084eb, number: #b084eb, selector: #ffb86c, operator: #ff2c6d, function: #35b3dc, keyword: #45a9f9, regex: #f4c025 )
 
 $pattern: $color-theme url("/assets/img/pattern_#{$theme}.jpg") center top repeat
 $pattern-overlay: transparent url("/assets/img/pattern_landing.jpg") center -138px no-repeat

From 8bc05c2ba97dd51fa9a066def0ab82a97ca55d11 Mon Sep 17 00:00:00 2001
From: ines <ines@ines.io>
Date: Thu, 23 Mar 2017 11:07:59 +0100
Subject: [PATCH 30/30] Delete old training scripts (resolves #911)

---
 bin/parser/conll_parse.py | 130 -------------------
 bin/parser/nn_train.py    | 261 --------------------------------------
 2 files changed, 391 deletions(-)
 delete mode 100644 bin/parser/conll_parse.py
 delete mode 100755 bin/parser/nn_train.py

diff --git a/bin/parser/conll_parse.py b/bin/parser/conll_parse.py
deleted file mode 100644
index 85a81c432..000000000
--- a/bin/parser/conll_parse.py
+++ /dev/null
@@ -1,130 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-
-import os
-from os import path
-import shutil
-import codecs
-import random
-import time
-import gzip
-
-import plac
-import cProfile
-import pstats
-
-import spacy.util
-from spacy.en import English
-from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir
-
-from spacy.syntax.parser import GreedyParser
-from spacy.syntax.parser import OracleError
-from spacy.syntax.util import Config
-
-
-def is_punct_label(label):
-    return label == 'P' or label.lower() == 'punct'
-
-
-def read_gold(file_):
-    """Read a standard CoNLL/MALT-style format"""
-    sents = []
-    for sent_str in file_.read().strip().split('\n\n'):
-        ids = []
-        words = []
-        heads = []
-        labels = []
-        tags = []
-        for i, line in enumerate(sent_str.split('\n')):
-            id_, word, pos_string, head_idx, label = _parse_line(line)
-            words.append(word)
-            if head_idx == -1:
-                head_idx = i
-            ids.append(id_)
-            heads.append(head_idx)
-            labels.append(label)
-            tags.append(pos_string)
-        text = ' '.join(words)
-        sents.append((text, [words], ids, words, tags, heads, labels))
-    return sents
-
-
-def _parse_line(line):
-    pieces = line.split()
-    id_ = int(pieces[0])
-    word = pieces[1]
-    pos = pieces[3]
-    head_idx = int(pieces[6])
-    label = pieces[7]
-    return id_, word, pos, head_idx, label
-
-        
-def iter_data(paragraphs, tokenizer, gold_preproc=False):
-    for raw, tokenized, ids, words, tags, heads, labels in paragraphs:
-        assert len(words) == len(heads)
-        for words in tokenized:
-            sent_ids = ids[:len(words)]
-            sent_tags = tags[:len(words)]
-            sent_heads = heads[:len(words)]
-            sent_labels = labels[:len(words)]
-            sent_heads = _map_indices_to_tokens(sent_ids, sent_heads)
-            tokens = tokenizer.tokens_from_list(words)
-            yield tokens, sent_tags, sent_heads, sent_labels
-            ids = ids[len(words):]
-            tags = tags[len(words):]
-            heads = heads[len(words):]
-            labels = labels[len(words):]
-
-
-def _map_indices_to_tokens(ids, heads):
-    mapped = []
-    for head in heads:
-        if head not in ids:
-            mapped.append(None)
-        else:
-            mapped.append(ids.index(head))
-    return mapped
-
-
-
-def evaluate(Language, dev_loc, model_dir):
-    global loss
-    nlp = Language()
-    n_corr = 0
-    pos_corr = 0
-    n_tokens = 0
-    total = 0
-    skipped = 0
-    loss = 0
-    with codecs.open(dev_loc, 'r', 'utf8') as file_:
-        paragraphs = read_gold(file_)
-    for tokens, tag_strs, heads, labels in iter_data(paragraphs, nlp.tokenizer):
-        assert len(tokens) == len(labels)
-        nlp.tagger.tag_from_strings(tokens, tag_strs)
-        nlp.parser(tokens)
-        for i, token in enumerate(tokens):
-            try:
-                pos_corr += token.tag_ == tag_strs[i]
-            except:
-                print i, token.orth_, token.tag
-                raise
-            n_tokens += 1
-            if heads[i] is None:
-                skipped += 1
-                continue
-            if is_punct_label(labels[i]):
-                continue
-            n_corr += token.head.i == heads[i]
-            total += 1
-    print loss, skipped, (loss+skipped + total)
-    print pos_corr / n_tokens
-    return float(n_corr) / (total + loss)
-
-
-def main(dev_loc, model_dir):
-    print evaluate(English, dev_loc, model_dir)
-    
-
-if __name__ == '__main__':
-    plac.call(main)
diff --git a/bin/parser/nn_train.py b/bin/parser/nn_train.py
deleted file mode 100755
index 72c9e04f1..000000000
--- a/bin/parser/nn_train.py
+++ /dev/null
@@ -1,261 +0,0 @@
-#!/usr/bin/env python
-from __future__ import division
-from __future__ import unicode_literals
-
-import os
-from os import path
-import shutil
-import codecs
-import random
-
-import plac
-import cProfile
-import pstats
-import re
-
-import spacy.util
-from spacy.en import English
-from spacy.en.pos import POS_TEMPLATES, POS_TAGS, setup_model_dir
-
-from spacy.syntax.util import Config
-from spacy.gold import read_json_file
-from spacy.gold import GoldParse
-
-from spacy.scorer import Scorer
-
-from spacy.syntax.parser import Parser, get_templates
-from spacy._theano import TheanoModel
-
-import theano
-import theano.tensor as T
-
-from theano.printing import Print
-
-import numpy
-from collections import OrderedDict, defaultdict
-
-
-theano.config.profile = False
-theano.config.floatX = 'float32'
-floatX = theano.config.floatX
-
-
-def L1(L1_reg, *weights):
-    return L1_reg * sum(abs(w).sum() for w in weights)
-
-
-def L2(L2_reg, *weights):
-    return L2_reg * sum((w ** 2).sum() for w in weights)
-
-
-def rms_prop(loss, params, eta=1.0, rho=0.9, eps=1e-6):
-    updates = OrderedDict()
-    for param in params:
-        value = param.get_value(borrow=True)
-        accu = theano.shared(np.zeros(value.shape, dtype=value.dtype),
-                             broadcastable=param.broadcastable)
-
-        grad = T.grad(loss, param)
-        accu_new = rho * accu + (1 - rho) * grad ** 2
-        updates[accu] = accu_new
-        updates[param] = param - (eta * grad / T.sqrt(accu_new + eps))
-    return updates
-
-
-def relu(x):
-    return x * (x > 0)
-
-
-def feed_layer(activation, weights, bias, input_):
-    return activation(T.dot(input_, weights) + bias)
-
-
-def init_weights(n_in, n_out):
-    rng = numpy.random.RandomState(1235)
-    
-    weights = numpy.asarray(
-        rng.standard_normal(size=(n_in, n_out)) * numpy.sqrt(2.0 / n_in),
-        dtype=theano.config.floatX
-    )
-    bias = numpy.zeros((n_out,), dtype=theano.config.floatX)
-    return [wrapper(weights, name='W'), wrapper(bias, name='b')]
-
-
-def compile_model(n_classes, n_hidden, n_in, optimizer):
-    x = T.vector('x') 
-    costs = T.ivector('costs')
-    loss = T.scalar('loss')
-
-    maxent_W, maxent_b = init_weights(n_hidden, n_classes)
-    hidden_W, hidden_b = init_weights(n_in, n_hidden)
-
-    # Feed the inputs forward through the network
-    p_y_given_x = feed_layer(
-                    T.nnet.softmax,
-                    maxent_W,
-                    maxent_b,
-                      feed_layer(
-                        relu,
-                        hidden_W,
-                        hidden_b,
-                        x))
-
-    loss = -T.log(T.sum(p_y_given_x[0] * T.eq(costs, 0)) + 1e-8)
-
-    train_model = theano.function(
-        name='train_model',
-        inputs=[x, costs],
-        outputs=[p_y_given_x[0], T.grad(loss, x), loss],
-        updates=optimizer(loss, [maxent_W, maxent_b, hidden_W, hidden_b]),
-        on_unused_input='warn'
-    )
-
-    evaluate_model = theano.function(
-        name='evaluate_model',
-        inputs=[x],
-        outputs=[
-            feed_layer(
-              T.nnet.softmax,
-              maxent_W,
-              maxent_b,
-              feed_layer(
-                relu,
-                hidden_W,
-                hidden_b,
-                x
-              )
-            )[0]
-        ]
-    )
-    return train_model, evaluate_model
-
-
-def score_model(scorer, nlp, annot_tuples, verbose=False):
-    tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-    nlp.tagger(tokens)
-    nlp.parser(tokens)
-    gold = GoldParse(tokens, annot_tuples)
-    scorer.score(tokens, gold, verbose=verbose)
-
-
-def train(Language, gold_tuples, model_dir, n_iter=15, feat_set=u'basic',
-          eta=0.01, mu=0.9, nv_hidden=100, nv_word=10, nv_tag=10, nv_label=10,
-          seed=0, n_sents=0,  verbose=False):
-
-    dep_model_dir = path.join(model_dir, 'deps')
-    pos_model_dir = path.join(model_dir, 'pos')
-    if path.exists(dep_model_dir):
-        shutil.rmtree(dep_model_dir)
-    if path.exists(pos_model_dir):
-        shutil.rmtree(pos_model_dir)
-    os.mkdir(dep_model_dir)
-    os.mkdir(pos_model_dir)
-    setup_model_dir(sorted(POS_TAGS.keys()), POS_TAGS, POS_TEMPLATES, pos_model_dir)
-
-    Config.write(dep_model_dir, 'config',
-        seed=seed,
-        templates=tuple(),
-        labels=Language.ParserTransitionSystem.get_labels(gold_tuples),
-        vector_lengths=(nv_word, nv_tag, nv_label),
-        hidden_nodes=nv_hidden,
-        eta=eta,
-        mu=mu
-    )
-  
-    # Bake-in hyper-parameters
-    optimizer = lambda loss, params: rms_prop(loss, params, eta=eta, rho=rho, eps=eps)
-    nlp = Language(data_dir=model_dir)
-    n_classes = nlp.parser.model.n_classes
-    train, predict = compile_model(n_classes, nv_hidden, n_in, optimizer)
-    nlp.parser.model = TheanoModel(n_classes, input_spec, train,
-                                   predict, model_loc)
- 
-    if n_sents > 0:
-        gold_tuples = gold_tuples[:n_sents]
-    print "Itn.\tP.Loss\tUAS\tTag %\tToken %"
-    log_loc = path.join(model_dir, 'job.log')
-    for itn in range(n_iter):
-        scorer = Scorer()
-        loss = 0
-        for _, sents in gold_tuples:
-            for annot_tuples, ctnt in sents:
-                if len(annot_tuples[1]) == 1:
-                    continue
-                score_model(scorer, nlp, annot_tuples)
-                tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-                nlp.tagger(tokens)
-                gold = GoldParse(tokens, annot_tuples, make_projective=True)
-                assert gold.is_projective
-                loss += nlp.parser.train(tokens, gold)
-                nlp.tagger.train(tokens, gold.tags)
-        random.shuffle(gold_tuples)
-        logline = '%d:\t%d\t%.3f\t%.3f\t%.3f' % (itn, loss, scorer.uas,
-                                                 scorer.tags_acc,
-                                                 scorer.token_acc)
-        print logline
-        with open(log_loc, 'aw') as file_:
-            file_.write(logline + '\n')
-    nlp.parser.model.end_training()
-    nlp.tagger.model.end_training()
-    nlp.vocab.strings.dump(path.join(model_dir, 'vocab', 'strings.txt'))
-    return nlp
-
-
-def evaluate(nlp, gold_tuples, gold_preproc=True):
-    scorer = Scorer()
-    for raw_text, sents in gold_tuples:
-        for annot_tuples, brackets in sents:
-            tokens = nlp.tokenizer.tokens_from_list(annot_tuples[1])
-            nlp.tagger(tokens)
-            nlp.parser(tokens)
-            gold = GoldParse(tokens, annot_tuples)
-            scorer.score(tokens, gold)
-    return scorer
-
-
-@plac.annotations(
-    train_loc=("Location of training file or directory"),
-    dev_loc=("Location of development file or directory"),
-    model_dir=("Location of output model directory",),
-    eval_only=("Skip training, and only evaluate", "flag", "e", bool),
-    n_sents=("Number of training sentences", "option", "n", int),
-    n_iter=("Number of training iterations", "option", "i", int),
-    verbose=("Verbose error reporting", "flag", "v", bool),
-
-    nv_word=("Word vector length", "option", "W", int),
-    nv_tag=("Tag vector length", "option", "T", int),
-    nv_label=("Label vector length", "option", "L", int),
-    nv_hidden=("Hidden nodes length", "option", "H", int),
-    eta=("Learning rate", "option", "E", float),
-    mu=("Momentum", "option", "M", float),
-)
-def main(train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, verbose=False,
-         nv_word=10, nv_tag=10, nv_label=10, nv_hidden=10,
-         eta=0.1, mu=0.9, eval_only=False):
-
-
-
-
-    gold_train = list(read_json_file(train_loc, lambda doc: 'wsj' in doc['id']))
-
-    nlp = train(English, gold_train, model_dir,
-               feat_set='embed',
-               eta=eta, mu=mu,
-               nv_word=nv_word, nv_tag=nv_tag, nv_label=nv_label, nv_hidden=nv_hidden,
-               n_sents=n_sents, n_iter=n_iter,
-               verbose=verbose)
-
-    scorer = evaluate(nlp, list(read_json_file(dev_loc)))
-    
-    print 'TOK', 100-scorer.token_acc
-    print 'POS', scorer.tags_acc
-    print 'UAS', scorer.uas
-    print 'LAS', scorer.las
-
-    print 'NER P', scorer.ents_p
-    print 'NER R', scorer.ents_r
-    print 'NER F', scorer.ents_f
-
-
-if __name__ == '__main__':
-    plac.call(main)