diff --git a/requirements.txt b/requirements.txt
index c6d43ddd7..a3404b0f5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,3 @@
-cython>=0.25
 numpy>=1.15.0
 cymem>=2.0.2,<2.1.0
 preshed>=2.0.1,<2.1.0
@@ -15,6 +14,7 @@ jsonschema>=2.6.0,<3.0.0
 wasabi>=0.0.8,<1.1.0
 pathlib==1.0.1; python_version < "3.4"
 # Development dependencies
+cython>=0.25
 pytest>=4.0.0,<5.0.0
 pytest-timeout>=1.3.0,<2.0.0
 mock>=2.0.0,<3.0.0
diff --git a/spacy/cli/info.py b/spacy/cli/info.py
index 90387f9f7..5df9ddadb 100644
--- a/spacy/cli/info.py
+++ b/spacy/cli/info.py
@@ -7,7 +7,7 @@ from pathlib import Path
 from wasabi import Printer
 
 from ._messages import Messages
-from ..compat import path2str
+from ..compat import path2str, basestring_, unicode_
 from .. import util
 from .. import about
 
@@ -44,7 +44,7 @@ def info(model=None, markdown=False, silent=False):
                 k: v for k, v in meta.items() if k not in ("accuracy", "speed")
             }
             if markdown:
-                util.print_markdown(model_meta, title=title)
+                print_markdown(model_meta, title=title)
             else:
                 msg.table(model_meta, title=title)
         return meta
@@ -58,7 +58,7 @@ def info(model=None, markdown=False, silent=False):
     if not silent:
         title = "Info about spaCy"
         if markdown:
-            util.print_markdown(data, title=title)
+            print_markdown(data, title=title)
         else:
             msg.table(data, title=title)
     return data
@@ -75,3 +75,19 @@ def list_models():
         models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
         return ", ".join([m for m in models if not exclude_dir(m)])
     return "-"
+
+
+def print_markdown(data, title=None):
+    """Print data in GitHub-flavoured Markdown format for issues etc.
+
+    data (dict or list of tuples): Label/value pairs.
+    title (unicode or None): Title, will be rendered as headline 2.
+    """
+    markdown = []
+    for key, value in data.items():
+        if isinstance(value, basestring_) and Path(value).exists():
+            continue
+        markdown.append("* **{}:** {}".format(key, unicode_(value)))
+    if title:
+        print("\n## {}".format(title))
+    print("\n{}\n".format("\n".join(markdown)))
diff --git a/spacy/cli/pretrain.py b/spacy/cli/pretrain.py
index 80e60a871..20d097047 100644
--- a/spacy/cli/pretrain.py
+++ b/spacy/cli/pretrain.py
@@ -160,7 +160,7 @@ def make_update(model, docs, optimizer, drop=0.0):
     return loss
 
 
-def make_docs(nlp, batch):
+def make_docs(nlp, batch, min_length=1, max_length=500):
     docs = []
     for record in batch:
         text = record["text"]
@@ -173,7 +173,7 @@ def make_docs(nlp, batch):
             heads = numpy.asarray(heads, dtype="uint64")
             heads = heads.reshape((len(doc), 1))
             doc = doc.from_array([HEAD], heads)
-        if len(doc) >= 1 and len(doc) < 200:
+        if len(doc) >= min_length and len(doc) < max_length:
             docs.append(doc)
     return docs
 
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index d49b94e44..9dec5d4bd 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -22,13 +22,13 @@ from .. import about
 # Batch size starts at 1 and grows, so that we make updates quickly
 # at the beginning of training.
 dropout_rates = util.decaying(
-    util.env_opt("dropout_from", 0.2),
-    util.env_opt("dropout_to", 0.2),
+    util.env_opt("dropout_from", 0.1),
+    util.env_opt("dropout_to", 0.1),
     util.env_opt("dropout_decay", 0.0),
 )
 batch_sizes = util.compounding(
-    util.env_opt("batch_from", 1000),
-    util.env_opt("batch_to", 1000),
+    util.env_opt("batch_from", 750),
+    util.env_opt("batch_to", 750),
     util.env_opt("batch_compound", 1.001),
 )
 
@@ -144,24 +144,6 @@ def train(
     if learn_tokens:
         nlp.add_pipe(nlp.create_pipe("merge_subtokens"))
 
-    # Take dropout and batch size as generators of values -- dropout
-    # starts high and decays sharply, to force the optimizer to explore.
-    # Batch size starts at 1 and grows, so that we make updates quickly
-    # at the beginning of training.
-    dropout_rates = util.decaying(
-        util.env_opt("dropout_from", 0.1),
-        util.env_opt("dropout_to", 0.1),
-        util.env_opt("dropout_decay", 0.0),
-    )
-    batch_sizes = util.compounding(
-        util.env_opt("batch_from", 750),
-        util.env_opt("batch_to", 750),
-        util.env_opt("batch_compound", 1.001),
-    )
-    lang_class = util.get_lang_class(lang)
-    nlp = lang_class()
-    meta["pipeline"] = pipeline
-    nlp.meta.update(meta)
     if vectors:
         msg.text(Messages.M058.format(model=vectors))
         _load_vectors(nlp, vectors)
@@ -187,6 +169,7 @@ def train(
     else:
         # Start with a blank model, call begin_training
         optimizer = nlp.begin_training(lambda: corpus.train_tuples, device=use_gpu)
+
     nlp._optimizer = None
 
     # Load in pre-trained weights
diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 0f25d7f53..26ff9753a 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -265,7 +265,7 @@ def _corrupt(c, noise_level):
         return '\n'
     elif c == '\n':
         return ' '
-    elif c in ['.', "'", "!", "?"]:
+    elif c in ['.', "'", "!", "?", ',']:
         return ''
     else:
         return c.lower()
diff --git a/spacy/pipeline.pyx b/spacy/pipeline.pyx
index 62d958ef2..3a09af644 100644
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@@ -13,7 +13,8 @@ from .util import msgpack
 from .util import msgpack_numpy
 
 from thinc.api import chain
-from thinc.v2v import Affine, SELU, Softmax
+from thinc.v2v import Affine, Maxout, Softmax
+from thinc.misc import LayerNorm
 from thinc.t2v import Pooling, max_pool, mean_pool
 from thinc.neural.util import to_categorical, copy_array
 from thinc.neural._classes.difference import Siamese, CauchySimilarity
@@ -442,7 +443,7 @@ class Tensorizer(Pipe):
         **cfg: Config parameters.
         RETURNS (Model): A `thinc.neural.Model` or similar instance.
         """
-        input_size = util.env_opt('token_vector_width', cfg.get('input_size', 128))
+        input_size = util.env_opt('token_vector_width', cfg.get('input_size', 96))
         return zero_init(Affine(output_size, input_size, drop_factor=0.0))
 
     def __init__(self, vocab, model=True, **cfg):
@@ -879,9 +880,10 @@ class MultitaskObjective(Tagger):
     @classmethod
     def Model(cls, n_tags, tok2vec=None, **cfg):
         token_vector_width = util.env_opt('token_vector_width', 96)
-        softmax = Softmax(n_tags, token_vector_width)
+        softmax = Softmax(n_tags, token_vector_width*2)
         model = chain(
             tok2vec,
+            LayerNorm(Maxout(token_vector_width*2, token_vector_width, pieces=3)),
             softmax
         )
         model.tok2vec = tok2vec
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index 6e949fc35..0663c1289 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -510,7 +510,7 @@ cdef class Parser:
         for action, label_freqs in previous_labels.items():
             for label in label_freqs:
                 self.moves.add_action(action, label)
-        cfg.setdefault('token_vector_width', 128)
+        cfg.setdefault('token_vector_width', 96)
         if self.model is True:
             self.model, cfg = self.Model(self.moves.n_moves, **cfg)
             if sgd is None:
diff --git a/spacy/util.py b/spacy/util.py
index a070e3045..d8c82da89 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -18,7 +18,7 @@ import numpy.random
 
 
 from .symbols import ORTH
-from .compat import cupy, CudaStream, path2str, basestring_, input_, unicode_
+from .compat import cupy, CudaStream, path2str, basestring_, unicode_
 from .compat import import_file, json_dumps
 from .errors import Errors
 
@@ -587,19 +587,6 @@ def is_json_serializable(obj):
         return False
 
 
-def get_raw_input(description, default=False):
-    """Get user input from the command line via raw_input / input.
-
-    description (unicode): Text to display before prompt.
-    default (unicode or False/None): Default value to display with prompt.
-    RETURNS (unicode): User input.
-    """
-    additional = " (default: %s)" % default if default else ""
-    prompt = "    %s%s: " % (description, additional)
-    user_input = input_(prompt)
-    return user_input
-
-
 def to_bytes(getters, exclude):
     serialized = OrderedDict()
     for key, getter in getters.items():
@@ -634,27 +621,6 @@ def from_disk(path, readers, exclude):
     return path
 
 
-def print_markdown(data, title=None):
-    """Print data in GitHub-flavoured Markdown format for issues etc.
-
-    data (dict or list of tuples): Label/value pairs.
-    title (unicode or None): Title, will be rendered as headline 2.
-    """
-
-    def excl_value(value):
-        # contains path, i.e. personal info
-        return isinstance(value, basestring_) and Path(value).exists()
-
-    if isinstance(data, dict):
-        data = list(data.items())
-    markdown = [
-        "* **{}:** {}".format(l, unicode_(v)) for l, v in data if not excl_value(v)
-    ]
-    if title:
-        print("\n## {}".format(title))
-    print("\n{}\n".format("\n".join(markdown)))
-
-
 def minify_html(html):
     """Perform a template-specific, rudimentary HTML minification for displaCy.
     Disclaimer: NOT a general-purpose solution, only removes indentation and