From 3a8d9b37a6fe968f92d82e94c5caa57885b96962 Mon Sep 17 00:00:00 2001
From: Jordan Suchow <suchow@berkeley.edu>
Date: Sun, 19 Apr 2015 01:31:31 -0700
Subject: [PATCH] Remove trailing whitespace

---
 README.md                          |  1 -
 bin/ner_tag.py                     |  2 --
 bin/parser/train.py                |  2 +-
 bin/prepare_vecs.py                |  2 +-
 contributors/cla.md                |  2 --
 dev_setup.py                       |  2 +-
 docs/source/api.rst                | 24 ++++++++++++------------
 docs/source/features.rst           |  1 -
 docs/source/guide/overview.rst     |  1 -
 docs/source/howworks.rst           | 10 ++++------
 docs/source/index.rst              | 18 +++++++++---------
 docs/source/lexrank_tutorial.rst   | 15 ++++++---------
 docs/source/license.rst            |  8 ++++----
 docs/source/quickstart.rst         | 13 ++++++-------
 docs/source/updates.rst            |  6 +++---
 lang_data/en/morphs.json           |  2 +-
 setup.py                           |  2 +-
 spacy/_ml.pxd                      |  2 +-
 spacy/attrs.pxd                    |  2 --
 spacy/en/__init__.py               |  8 ++++----
 spacy/en/download.py               |  2 +-
 spacy/en/pos.pxd                   |  1 -
 spacy/en/pos.pyx                   |  3 +--
 spacy/lexeme.pxd                   |  2 +-
 spacy/lexeme.pyx                   |  8 ++++----
 spacy/ner/_feats.pyx               |  4 ++--
 spacy/ner/annot.pyx                |  2 +-
 spacy/ner/bilou_moves.pyx          |  6 +++---
 spacy/ner/context.pxd              |  2 --
 spacy/ner/context.pyx              |  2 +-
 spacy/ner/feats.pyx                |  4 ++--
 spacy/ner/greedy_parser.pyx        |  2 +-
 spacy/ner/io_moves.pxd             |  2 +-
 spacy/ner/io_moves.pyx             |  4 ++--
 spacy/ner/pystate.pxd              |  2 +-
 spacy/scorer.py                    |  2 +-
 spacy/spans.pxd                    |  3 +--
 spacy/structs.pxd                  |  4 ++--
 spacy/syntax/_parse_features.pxd   | 14 +++++++-------
 spacy/syntax/_parse_features.pyx   | 14 +++++++-------
 spacy/syntax/_state.pyx            |  2 +-
 spacy/syntax/arc_eager.pxd         |  2 +-
 spacy/syntax/ner.pyx               |  2 --
 spacy/syntax/parser.pyx            |  4 ++--
 spacy/syntax/transition_system.pxd |  6 +++---
 spacy/syntax/util.py               |  2 --
 spacy/tokenizer.pyx                |  4 ++--
 spacy/tokens.pxd                   |  6 +++---
 spacy/tokens.pyx                   | 11 +++++------
 spacy/typedefs.pxd                 |  2 --
 spacy/util.py                      |  4 ++--
 spacy/vocab.pxd                    |  3 +--
 spacy/vocab.pyx                    |  4 ++--
 tests/sun.tokens                   |  6 +++---
 tests/test_align.py                |  3 ---
 tests/test_array.py                |  2 --
 tests/test_docs.py                 |  2 +-
 tests/test_merge.py                |  1 -
 tests/test_number.py               |  1 -
 tests/test_parse_navigate.py       |  1 -
 tests/test_post_punct.py           |  1 -
 tests/test_string_loading.py       |  3 ---
 tests/test_subtree.py              |  1 -
 tests/test_token_api.py            |  2 --
 tests/test_tokenizer.py            |  8 ++++----
 tests/test_whitespace.py           |  2 --
 tests/tokenizer.sed                |  2 +-
 67 files changed, 124 insertions(+), 169 deletions(-)

diff --git a/README.md b/README.md
index 29a82cd75..95afcb8ae 100644
--- a/README.md
+++ b/README.md
@@ -35,4 +35,3 @@ Difficult to support:
 
 * PyPy 2.7
 * PyPy 3.4
-
diff --git a/bin/ner_tag.py b/bin/ner_tag.py
index e7ec1e51e..34588bd12 100644
--- a/bin/ner_tag.py
+++ b/bin/ner_tag.py
@@ -30,5 +30,3 @@ def main(text_loc):
 
 if __name__ == '__main__':
     plac.call(main)
-
-        
diff --git a/bin/parser/train.py b/bin/parser/train.py
index 48069b829..9ae3a3267 100755
--- a/bin/parser/train.py
+++ b/bin/parser/train.py
@@ -132,7 +132,7 @@ def main(train_loc, dev_loc, model_dir, n_sents=0, out_loc="", verbose=False,
     print 'NER P', scorer.ents_p
     print 'NER R', scorer.ents_r
     print 'NER F', scorer.ents_f
-    
+
 
 if __name__ == '__main__':
     plac.call(main)
diff --git a/bin/prepare_vecs.py b/bin/prepare_vecs.py
index 3262f7ae6..b55dafee3 100644
--- a/bin/prepare_vecs.py
+++ b/bin/prepare_vecs.py
@@ -7,7 +7,7 @@ from spacy.vocab import write_binary_vectors
 
 def main(in_loc, out_loc):
     write_binary_vectors(in_loc, out_loc)
-    
+
 
 if __name__ == '__main__':
     plac.call(main)
diff --git a/contributors/cla.md b/contributors/cla.md
index cc63ca444..007739a1a 100644
--- a/contributors/cla.md
+++ b/contributors/cla.md
@@ -11,5 +11,3 @@ The CLA must be signed on your first pull request. To do this, simply fill in th
     $ git add -A spaCy/contributors/<your GitHub username>.md
     
 Now finish your pull request, and you're done.
-
-
diff --git a/dev_setup.py b/dev_setup.py
index 3b8fc9f73..8d2f84360 100644
--- a/dev_setup.py
+++ b/dev_setup.py
@@ -102,7 +102,7 @@ exts = [
     Extension("spacy.syntax.arc_eager", ["spacy/syntax/arc_eager.pyx"], **ext_args),
     Extension("spacy.syntax._parse_features", ["spacy/syntax/_parse_features.pyx"],
               **ext_args)
-    
+
     #Extension("spacy.pos_feats", ["spacy/pos_feats.pyx"], language="c++", include_dirs=includes),
     #Extension("spacy.ner._state", ["spacy/ner/_state.pyx"], language="c++", include_dirs=includes),
     #Extension("spacy.ner.bilou_moves", ["spacy/ner/bilou_moves.pyx"], language="c++", include_dirs=includes),
diff --git a/docs/source/api.rst b/docs/source/api.rst
index e8638ed55..808204e65 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -28,7 +28,7 @@ API
 
 
 .. autoclass:: spacy.tokens.Tokens
-  
+
   +---------------+-------------+-------------+
   | Attribute     | Type        | Attr API    |
   +===============+=============+=============+
@@ -48,7 +48,7 @@ API
     For faster access, the underlying C data can be accessed from Cython.  You
     can also export the data to a numpy array, via `Tokens.to_array`, if pure Python
     access is required, and you need slightly better performance.  However, this
-    is both slower and has a worse API than Cython access.  
+    is both slower and has a worse API than Cython access.
 
 
 .. autoclass:: spacy.tokens.Token
@@ -119,7 +119,7 @@ API
 
   shape
     A transform of the word's string, to show orthographic features.  The
-    characters a-z are mapped to x, A-Z is mapped to X, 0-9 is mapped to d. 
+    characters a-z are mapped to x, A-Z is mapped to X, 0-9 is mapped to d.
     After these mappings, sequences of 4 or more of the same character are
     truncated to length 4. Examples: C3Po --> XdXx, favorite --> xxxx,
     :) --> :)
@@ -161,7 +161,7 @@ API
   pos
     A part-of-speech tag, from the Google Universal Tag Set, e.g. NOUN, VERB,
     ADV.  Constants for the 17 tag values are provided in spacy.parts\_of\_speech.
- 
+
   dep
     The type of syntactic dependency relation between the word and its
     syntactic head.
@@ -185,10 +185,10 @@ API
 
   rights
     An iterator for the immediate rightward syntactic children of the word.
-    
+
   children
     An iterator that yields from lefts, and then yields from rights.
- 
+
   subtree
     An iterator for the part of the sentence syntactically governed by the
     word, including the word itself.
@@ -205,15 +205,15 @@ API
 .. py:class:: vocab.Vocab(self, data_dir=None, lex_props_getter=None)
 
   .. py:method:: __len__(self) --> int
-  
+
   .. py:method:: __getitem__(self, id: int) --> unicode
-  
+
   .. py:method:: __getitem__(self, string: unicode) --> int
-  
+
   .. py:method:: __setitem__(self, py_str: unicode, props: Dict[str, int[float]) --> None
 
   .. py:method:: dump(self, loc: unicode) --> None
-  
+
   .. py:method:: load_lexemes(self, loc: unicode) --> None
 
   .. py:method:: load_vectors(self, loc: unicode) --> None
@@ -223,9 +223,9 @@ API
   .. py:method:: __len__(self) --> int
 
   .. py:method:: __getitem__(self, id: int) --> unicode
-  
+
   .. py:method:: __getitem__(self, string: bytes) --> id
-  
+
   .. py:method:: __getitem__(self, string: unicode) --> id
 
   .. py:method:: dump(self, loc: unicode) --> None
diff --git a/docs/source/features.rst b/docs/source/features.rst
index 3e586dd68..1643ad2bb 100644
--- a/docs/source/features.rst
+++ b/docs/source/features.rst
@@ -75,4 +75,3 @@ Boolean features
 +-------------+--------------------------------------------------------------+
 | IN_LIST     | Facility for loading arbitrary run-time word lists?          |
 +-------------+--------------------------------------------------------------+
-
diff --git a/docs/source/guide/overview.rst b/docs/source/guide/overview.rst
index 59d0810d8..7e1b34558 100644
--- a/docs/source/guide/overview.rst
+++ b/docs/source/guide/overview.rst
@@ -68,4 +68,3 @@ Cons:
 - Higher memory usage (up to 1gb)
 - More conceptually complicated
 - Tokenization rules expressed in code, not as data
-
diff --git a/docs/source/howworks.rst b/docs/source/howworks.rst
index 5538988d1..3abc2ef05 100644
--- a/docs/source/howworks.rst
+++ b/docs/source/howworks.rst
@@ -122,7 +122,7 @@ it is, we stop splitting, and return the tokenization at that point.
 The advantage of this design is that the prefixes, suffixes and special-cases
 can be declared separately, in easy-to-understand files.  If a new entry is
 added to the special-cases, you can be sure that it won't have some unforeseen
-consequence to a complicated regular-expression grammar. 
+consequence to a complicated regular-expression grammar.
 
 Coupling the Tokenizer and Lexicon
 ##################################
@@ -159,7 +159,7 @@ Dependency Parser
 
 The parser uses the algorithm described in my `2014 blog post`_.
 This algorithm, shift-reduce dependency parsing, is becoming widely adopted due
-to its compelling speed/accuracy trade-off.  
+to its compelling speed/accuracy trade-off.
 
 Some quick details about spaCy's take on this, for those who happen to know
 these models well.  I'll write up a better description shortly.
@@ -176,7 +176,7 @@ scored 91.0. So how have I gotten it to 92.4?  The following tweaks:
 1. I use Brown cluster features --- these help a lot;
 2. I redesigned the feature set. I've long known that the Zhang and Nivre
    (2011) feature set was suboptimal, but a few features don't make a very
-   compelling publication.  Still, they're important.  
+   compelling publication.  Still, they're important.
 3. When I do the dynamic oracle training, I also make
    the upate cost-sensitive: if the oracle determines that the move the parser
    took has a cost of N, then the weights for the gold class are incremented by
@@ -253,12 +253,10 @@ the classes.  In the case of the parser, this means the hash table is accessed
 2NKC times, instead of the 2NK times if you have a weights vector.  You should
 also be careful to store the weights contiguously in memory --- you don't want
 a linked list here.  I use a block-sparse format, because my problems tend to
-have a few dozen classes. 
+have a few dozen classes.
 
 I guess if I had to summarize my experience, I'd say that the efficiency of
 these models is really all about the data structures.  We want to stay small,
 and stay contiguous.  Minimize redundancy and minimize pointer chasing.
 That's why Cython is so well suited to this: we get to lay out our data
 structures, and manage the memory ourselves, with full C-level control.
-
-
diff --git a/docs/source/index.rst b/docs/source/index.rst
index bab31eb7b..75892b975 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -65,7 +65,7 @@ it, link it, filter it, categorise it, generate it and correct it.
 
 spaCy provides a library of utility functions that help programmers build such
 products.  It's commercial open source software: you can either use it under
-the AGPL, or you can `buy a commercial license`_ for a one-time fee. 
+the AGPL, or you can `buy a commercial license`_ for a one-time fee.
 
 .. _buy a commercial license: license.html
 
@@ -148,7 +148,7 @@ cosine metric:
 
     >>> from numpy import dot
     >>> from numpy.linalg import norm
- 
+
     >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
     >>> words = [w for w in nlp.vocab if w.has_repvec]
     >>> words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
@@ -200,7 +200,7 @@ this:
 
 
 
-We wanted to refine the logic so that only adverbs modifying evocative verbs 
+We wanted to refine the logic so that only adverbs modifying evocative verbs
 of communication, like "pleaded", were highlighted.  We've now built a vector that
 represents that type of word, so now we can highlight adverbs based on very
 subtle logic, honing in on adverbs that seem the most stylistically
@@ -213,7 +213,7 @@ problematic, given our starting assumptions:
     >>> from spacy.parts_of_speech import ADV, VERB
     >>> cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
     >>> def is_bad_adverb(token, target_verb, tol):
-    ...   if token.pos != ADV 
+    ...   if token.pos != ADV
     ...     return False
     ...   elif token.head.pos != VERB:
     ...     return False
@@ -238,11 +238,11 @@ database, and processed with an NLP library, to one of three levels of detail
 --- tokenization, tagging, or parsing.  The tasks are additive: to parse the
 text you have to tokenize and tag it.  The  pre-processing was not subtracted
 from the times --- I report the time required for the pipeline to complete.
-I report mean times per document, in milliseconds. 
+I report mean times per document, in milliseconds.
 
 **Hardware**: Intel i7-3770 (2012)
 
-.. table:: Efficiency comparison. Lower is better. 
+.. table:: Efficiency comparison. Lower is better.
 
   +--------------+---------------------------+--------------------------------+
   |              | Absolute (ms per doc)     | Relative (to spaCy)            |
@@ -287,7 +287,7 @@ representations.
 .. spaCy is based on science, not alchemy.  It's open source, and I am happy to
   clarify any detail of the algorithms I've implemented.
   It's evaluated against the current best published systems, following the standard
-  methodologies.  These evaluations show that it performs extremely well.  
+  methodologies.  These evaluations show that it performs extremely well.
 
 Accuracy Comparison
 -------------------
@@ -299,7 +299,7 @@ Accuracy Comparison
   +--------------+----------+------------+
   | spaCy        | 97.2     | 92.4       |
   +--------------+----------+------------+
-  | CoreNLP      | 96.9     | 92.2       | 
+  | CoreNLP      | 96.9     | 92.2       |
   +--------------+----------+------------+
   | ZPar         | 97.3     | 92.9       |
   +--------------+----------+------------+
@@ -329,5 +329,5 @@ previous fastest parser that I'm aware of.
     quickstart.rst
     api.rst
     howworks.rst
-    license.rst 
+    license.rst
     updates.rst
diff --git a/docs/source/lexrank_tutorial.rst b/docs/source/lexrank_tutorial.rst
index 5f3e472dd..f5c5ae8fd 100644
--- a/docs/source/lexrank_tutorial.rst
+++ b/docs/source/lexrank_tutorial.rst
@@ -97,7 +97,7 @@ like lead-text take a while to float up the priority list.  This strategy also h
 the advantage of transparency: it's obvious to users how the decision is being
 made, so nobody is likely to complain about the feature if it works this way.
 
-Instead of cutting off the text mid-word, we can tokenize the text, and 
+Instead of cutting off the text mid-word, we can tokenize the text, and
 
 +----------------+-----------+
 | System         | Rouge-1 R |
@@ -116,7 +116,7 @@ A simple bag-of-words model can be created using the `count_by` method, which
 produces a dictionary of frequencies, keyed by string IDs:
 
 .. code:: python
-    
+
     >>> from spacy.en import English
     >>> from spacy.en.attrs import SIC
     >>> nlp = English()
@@ -148,7 +148,7 @@ from any token:
 
 
 
-    
+
 
 .. _word embeddings: https://colah.github.io/posts/2014-07-NLP-RNNs-Representations/
 
@@ -196,8 +196,8 @@ undirected --- so, it's natural to represent this as a matrix:
 
     from scipy.spatial.distance import cosine
     import numpy
-    
-    
+
+
     def lexrank(sent_vectors):
         n = len(sent_vectors)
         # Build the cosine similarity matrix
@@ -205,7 +205,7 @@ undirected --- so, it's natural to represent this as a matrix:
         for i in range(n):
             for j in range(n):
                 matrix[i, j] = cosine(sent_vectors[i], sent_vectors[j])
-        # Normalize 
+        # Normalize
         for i in range(n):
             matrix[i] /= sum(matrix[i])
         return _pagerank(matrix)
@@ -278,6 +278,3 @@ sentence represents the document as a whole.
 
 Document Model
 --------------
-
-
-
diff --git a/docs/source/license.rst b/docs/source/license.rst
index feb83feab..7dc889586 100644
--- a/docs/source/license.rst
+++ b/docs/source/license.rst
@@ -13,7 +13,7 @@ I've been writing spaCy for six months now, and I'm very excited to release it.
 I think it's the most valuable thing I could have built.  When I was in
 academia, I noticed that small companies couldn't really make use of our work.
 Meanwhile the tech giants have been hiring *everyone*, and putting this stuff
-into production.  I think spaCy can change that.  
+into production.  I think spaCy can change that.
 
 
   +------------+-----------+----------+-------------------------------------+
@@ -52,14 +52,14 @@ Examples
 --------
 
 In order to clarify how spaCy's license structure might apply to you, I've
-written a few examples, in the form of user-stories.  
+written a few examples, in the form of user-stories.
 
 Ashley and Casey: Seed stage start-up
 #####################################
 
 Ashley and Casey have an idea for a start-up.  To explore their idea, they want
 to build a minimum viable product they can put in front of potential users and
-investors. 
+investors.
 
 They have two options.
 
@@ -75,7 +75,7 @@ They have two options.
      import a module that imports it, etc).  They also cannot use spaCy as
      a network resource, by running it as a service --- this is the
      loophole that the "A" part of the AGPL is designed to close.
-     
+
 Ashley and Casey find the AGPL license unattractive for commercial use.
 They decide to take up the trial commercial license.
 However,  over the next 90 days, Ashley has to move house twice, and Casey gets
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 470df42d7..ec9f612ff 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -18,7 +18,7 @@ With Python 2.7 or Python 3, using Linux or OSX, run:
 .. _300 mb of data: http://s3-us-west-1.amazonaws.com/media.spacynlp.com/en_data_all-0.4.tgz
 
 
-The download command fetches and installs about 300mb of data, for the 
+The download command fetches and installs about 300mb of data, for the
 parser model and word vectors, which it installs within the spacy.en package directory.
 
 If you're stuck using a server with an old version of Python, and you don't
@@ -88,7 +88,7 @@ the original orthographic form of the word.
 
   .. py:class:: spacy.en.English(self, data_dir=join(dirname(__file__), 'data'))
 
-    .. py:method:: __call__(self, text: unicode, tag=True, parse=True, entity=True, merge_mwes=False) --> Tokens 
+    .. py:method:: __call__(self, text: unicode, tag=True, parse=True, entity=True, merge_mwes=False) --> Tokens
 
     +-----------------+--------------+--------------+
     | Attribute       | Type         | Its API      |
@@ -121,7 +121,7 @@ the original orthographic form of the word.
 **Get sentence or named entity spans**
 
   .. py:attribute:: tokens.Tokens.sents --> Iterator[Span]
-  
+
   .. py:attribute:: tokens.Tokens.ents --> Iterator[Span]
 
     You can iterate over a Span to access individual Tokens, or access its
@@ -131,7 +131,7 @@ the original orthographic form of the word.
 **Embedded word representenations**
 
   .. py:attribute:: tokens.Token.repvec
-  
+
   .. py:attribute:: lexeme.Lexeme.repvec
 
 
@@ -150,13 +150,13 @@ the original orthographic form of the word.
 **Align to original string**
 
   .. py:attribute:: string: unicode
-    
+
     Padded with original whitespace.
 
   .. py:attribute:: length: int
 
     Length, in unicode code-points. Equal to len(self.orth_).
-    
+
   .. py:attribute:: idx: int
 
     Starting offset of word in the original string.
@@ -234,4 +234,3 @@ Features
   +---------+-----------------------------------------------------------+
   | prob    | Log probability of word, smoothed with Simple Good-Turing |
   +---------+-----------------------------------------------------------+
-
diff --git a/docs/source/updates.rst b/docs/source/updates.rst
index 0d2eb0c9c..0b443266a 100644
--- a/docs/source/updates.rst
+++ b/docs/source/updates.rst
@@ -21,7 +21,7 @@ Bug Fixes
     all look-ups into the vocabulary failed on wide unicode builds, which
     further meant that the part-of-speech tagger and parser features were not
     computed correctly.
-    
+
     The fix is simple: we already have to read in a list of all the strings, so
     just store an index into that list, instead of a hash.
 
@@ -36,7 +36,7 @@ Bug Fixes
     and we want to freely navigate up and down it without creating reference
     cycles that inhibit garbage collection, and without doing a lot of copying,
     creating and deleting.
-    
+
     I think I've got a promising solution to this, but I suspect there's
     currently a memory leak.  Please get in touch no the tracker if you want to
     know more, especially if you think you can help.
@@ -60,7 +60,7 @@ Most English parsing research is performed on text with perfect pre-processing:
 one newline between every sentence, one space between every token.
 It's always been done this way, and it's good.  It's a useful idealisation,
 because the pre-processing has few algorithmic implications.
-    
+
 But, for practical performance, this stuff can matter a lot.
 Dridan and Oepen (2013) did a simple but rare thing: they actually ran a few
 parsers on raw text.  Even on the standard Wall Street Journal corpus,
diff --git a/lang_data/en/morphs.json b/lang_data/en/morphs.json
index fe361654a..41fda9aa7 100644
--- a/lang_data/en/morphs.json
+++ b/lang_data/en/morphs.json
@@ -1,7 +1,7 @@
 {
     "PRP": {
         "I":          {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 1},
-        "me":         {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3},   
+        "me":         {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 3},
         "mine":       {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 2},
         "myself":     {"L": "-PRON-", "person": 1, "number": 1, "gender": 0, "case": 4},
         "you":        {"L": "-PRON-", "person": 2, "number": 0, "gender": 0, "case": 0},
diff --git a/setup.py b/setup.py
index c21360a97..9857ad955 100644
--- a/setup.py
+++ b/setup.py
@@ -150,7 +150,7 @@ MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
              'spacy.morphology',
              'spacy._ml', 'spacy.tokenizer', 'spacy.en.attrs',
              'spacy.en.pos', 'spacy.syntax.parser', 'spacy.syntax._state',
-             'spacy.syntax.transition_system', 
+             'spacy.syntax.transition_system',
              'spacy.syntax.arc_eager', 'spacy.syntax._parse_features',
              'spacy.syntax.conll', 'spacy.orth',
              'spacy.syntax.ner']
diff --git a/spacy/_ml.pxd b/spacy/_ml.pxd
index d23bd5b19..4b111217e 100644
--- a/spacy/_ml.pxd
+++ b/spacy/_ml.pxd
@@ -33,7 +33,7 @@ cdef class Model:
 cdef class HastyModel:
     cdef Pool mem
     cdef weight_t* _scores
- 
+
     cdef const weight_t* score(self, atom_t* context) except NULL
     cdef int update(self, atom_t* context, class_t guess, class_t gold, int cost) except -1
 
diff --git a/spacy/attrs.pxd b/spacy/attrs.pxd
index d0727d287..f6e35d90b 100644
--- a/spacy/attrs.pxd
+++ b/spacy/attrs.pxd
@@ -79,5 +79,3 @@ cpdef enum attr_id_t:
     POS
     TAG
     DEP
-
-
diff --git a/spacy/en/__init__.py b/spacy/en/__init__.py
index 66d1f705f..b50e2f006 100644
--- a/spacy/en/__init__.py
+++ b/spacy/en/__init__.py
@@ -129,19 +129,19 @@ class English(object):
                  entity=parse_if_model_present, merge_mwes=False):
         """Apply the pipeline to some text.  The text can span multiple sentences,
         and can contain arbtrary whitespace.  Alignment into the original string
-        
+
         The tagger and parser are lazy-loaded the first time they are required.
         Loading the parser model usually takes 5-10 seconds.
-        
+
         Args:
             text (unicode): The text to be processed.
 
         Keyword args:
             tag (bool): Whether to add part-of-speech tags to the text.  Also
                 sets morphological analysis and lemmas.
-        
+
             parse (True, False, -1): Whether to add labelled syntactic dependencies.
-            
+
               -1 (default) is "guess": It will guess True if tag=True and the
                 model has been installed.
 
diff --git a/spacy/en/download.py b/spacy/en/download.py
index ce0ab343e..1fdf24c0f 100644
--- a/spacy/en/download.py
+++ b/spacy/en/download.py
@@ -39,7 +39,7 @@ def install_parser_model(url, dest_dir):
 def install_dep_vectors(url, dest_dir):
     if not os.path.exists(dest_dir):
         os.mkdir(dest_dir)
-    
+
     filename = download_file(url, dest_dir)
 
 
diff --git a/spacy/en/pos.pxd b/spacy/en/pos.pxd
index 22d65cde2..b59481020 100644
--- a/spacy/en/pos.pxd
+++ b/spacy/en/pos.pxd
@@ -22,4 +22,3 @@ cdef class EnPosTagger:
 
     cdef int set_morph(self, const int i, const PosTag* tag, TokenC* tokens) except -1
     cdef int lemmatize(self, const univ_pos_t pos, const LexemeC* lex) except -1
-
diff --git a/spacy/en/pos.pyx b/spacy/en/pos.pyx
index 8b4e0730e..51d07c2c1 100644
--- a/spacy/en/pos.pyx
+++ b/spacy/en/pos.pyx
@@ -353,7 +353,7 @@ cdef class EnPosTagger:
                 cached.lemma = self.strings[lemma_str]
                 set_morph_from_dict(&cached.morph, props)
                 self._morph_cache.set(pos, orth, <void*>cached)
- 
+
 
 cdef int fill_context(atom_t* context, const int i, const TokenC* tokens) except -1:
     _fill_from_token(&context[P2_orth], &tokens[i-2])
@@ -381,4 +381,3 @@ cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
         context[7] = 4
     else:
         context[7] = 0
-
diff --git a/spacy/lexeme.pxd b/spacy/lexeme.pxd
index 41324cd38..87354d532 100644
--- a/spacy/lexeme.pxd
+++ b/spacy/lexeme.pxd
@@ -12,7 +12,7 @@ cdef LexemeC EMPTY_LEXEME
 
 cdef int set_lex_struct_props(LexemeC* lex, dict props, StringStore strings,
                               const float* empty_vec) except -1
- 
+
 cdef class Lexeme:
     cdef readonly ndarray repvec
 
diff --git a/spacy/lexeme.pyx b/spacy/lexeme.pyx
index 7a2bce95a..d66161c83 100644
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@@ -17,12 +17,12 @@ cdef int set_lex_struct_props(LexemeC* lex, dict props, StringStore string_store
                               const float* empty_vec) except -1:
     lex.length = props['length']
     lex.orth = string_store[props['orth']]
-    lex.lower = string_store[props['lower']] 
-    lex.norm = string_store[props['norm']] 
-    lex.shape = string_store[props['shape']] 
+    lex.lower = string_store[props['lower']]
+    lex.norm = string_store[props['norm']]
+    lex.shape = string_store[props['shape']]
     lex.prefix = string_store[props['prefix']]
     lex.suffix = string_store[props['suffix']]
-    
+
     lex.cluster = props['cluster']
     lex.prob = props['prob']
     lex.sentiment = props['sentiment']
diff --git a/spacy/ner/_feats.pyx b/spacy/ner/_feats.pyx
index 18e073c5b..c1b6e1c35 100644
--- a/spacy/ner/_feats.pyx
+++ b/spacy/ner/_feats.pyx
@@ -58,10 +58,10 @@ LOCAL = (
     (N3.sic,),
     (P4.sic,),
     (N4.sic,),
-    
+
     (P1.sic, N0.sic,),
     (N0.sic, N1.sic),
-    
+
     (N0.prefix,),
     (N0.suffix,),
 
diff --git a/spacy/ner/annot.pyx b/spacy/ner/annot.pyx
index d04345319..a1e582e5c 100644
--- a/spacy/ner/annot.pyx
+++ b/spacy/ner/annot.pyx
@@ -11,7 +11,7 @@ cdef class NERAnnotation:
         memset(self.starts, -1, sizeof(int) * length)
         memset(self.ends, -1, sizeof(int) * length)
         memset(self.labels, -1, sizeof(int) * length)
-        
+
         cdef int start, end, label
         for start, end, label in entities:
             for i in range(start, end):
diff --git a/spacy/ner/bilou_moves.pyx b/spacy/ner/bilou_moves.pyx
index 42cef3fb7..a73a48135 100644
--- a/spacy/ner/bilou_moves.pyx
+++ b/spacy/ner/bilou_moves.pyx
@@ -107,7 +107,7 @@ cdef bint is_oracle(ActionType act, int tag, ActionType g_act, int g_tag,
             # U, Gold L --> False
             # U, Gold O --> False
             return False
-    
+
 
 cdef int set_accept_if_valid(Move* moves, int n_classes, State* s) except 0:
     cdef int n_accept = 0
@@ -160,7 +160,7 @@ cdef Move* best_accepted(Move* moves, weight_t* scores, int n) except NULL:
     cdef int best = first_accept
     cdef weight_t score = scores[first_accept-1]
     cdef int i
-    for i in range(first_accept+1, n): 
+    for i in range(first_accept+1, n):
         if moves[i].accept and scores[i-1] > score:
             best = i
             score = scores[i-1]
@@ -179,7 +179,7 @@ cdef int transition(State *s, Move* move) except -1:
         end_entity(s)
     elif move.action == OUT:
         pass
-    s.tags[s.i] = move.clas 
+    s.tags[s.i] = move.clas
     s.i += 1
 
 
diff --git a/spacy/ner/context.pxd b/spacy/ner/context.pxd
index f9280c516..433334765 100644
--- a/spacy/ner/context.pxd
+++ b/spacy/ner/context.pxd
@@ -149,5 +149,3 @@ cpdef enum:
 
 
 cdef int fill_context(atom_t* context, State* s, Tokens tokens) except -1
-
-
diff --git a/spacy/ner/context.pyx b/spacy/ner/context.pyx
index c062bb098..f6beb1501 100644
--- a/spacy/ner/context.pyx
+++ b/spacy/ner/context.pyx
@@ -18,7 +18,7 @@ cdef int _fill_token(atom_t* c, Lexeme* lex, atom_t pos):
     c[T_postype] = lex.postype
     c[T_nertype] = 0
     c[T_sensetype] = 0
-    
+
     c[T_is_alpha] = lex.flags & (1 << IS_ALPHA)
     c[T_is_digit] = lex.flags & (1 << IS_DIGIT)
     c[T_is_lower] = lex.flags & (1 << IS_LOWER)
diff --git a/spacy/ner/feats.pyx b/spacy/ner/feats.pyx
index 60910f235..b1657716e 100644
--- a/spacy/ner/feats.pyx
+++ b/spacy/ner/feats.pyx
@@ -7,10 +7,10 @@ LOCAL = (
     (N1_sic,),
     (P2_sic,),
     (N2_sic,),
-    
+
     (P1_sic, W_sic,),
     (W_sic, N1_sic),
-    
+
     (W_prefix,),
     (W_suffix,),
 
diff --git a/spacy/ner/greedy_parser.pyx b/spacy/ner/greedy_parser.pyx
index 5825c7539..94d096529 100644
--- a/spacy/ner/greedy_parser.pyx
+++ b/spacy/ner/greedy_parser.pyx
@@ -92,7 +92,7 @@ cdef class NERParser:
             fill_context(self._context, s, tokens)
             self.extractor.extract(self._feats, self._values, self._context, NULL)
             self.model.score(self._scores, self._feats, self._values)
-            
+
             set_accept_if_valid(self._moves, self.n_classes, s)
             guess = best_accepted(self._moves, self._scores, self.n_classes)
             assert guess.clas != 0
diff --git a/spacy/ner/io_moves.pxd b/spacy/ner/io_moves.pxd
index 97f9512e8..50f6be106 100644
--- a/spacy/ner/io_moves.pxd
+++ b/spacy/ner/io_moves.pxd
@@ -16,7 +16,7 @@ cpdef enum ActionType:
 
 cdef int set_accept_if_oracle(Move* moves, int n, State* s,
                               int* g_starts, int* g_ends, int* g_labels) except 0
- 
+
 cdef int set_accept_if_valid(Move* moves, int n, State* s) except 0
 
 cdef Move* best_accepted(Move* moves, weight_t* scores, int n) except NULL
diff --git a/spacy/ner/io_moves.pyx b/spacy/ner/io_moves.pyx
index dc268e4a5..257a18f3c 100644
--- a/spacy/ner/io_moves.pyx
+++ b/spacy/ner/io_moves.pyx
@@ -97,7 +97,7 @@ cdef Move* best_accepted(Move* moves, weight_t* scores, int n) except NULL:
     cdef int best = first_accept
     cdef weight_t score = scores[first_accept-1]
     cdef int i
-    for i in range(first_accept+1, n): 
+    for i in range(first_accept+1, n):
         if moves[i].accept and scores[i-1] > score:
             best = i
             score = scores[i-1]
@@ -105,7 +105,7 @@ cdef Move* best_accepted(Move* moves, weight_t* scores, int n) except NULL:
 
 
 cdef int transition(State *s, Move* move) except -1:
-    s.tags[s.i] = move.clas 
+    s.tags[s.i] = move.clas
     if move.action == OUT:
         s.i += 1
     elif move.action == SHIFT:
diff --git a/spacy/ner/pystate.pxd b/spacy/ner/pystate.pxd
index 9293fae01..6710d9f40 100644
--- a/spacy/ner/pystate.pxd
+++ b/spacy/ner/pystate.pxd
@@ -8,7 +8,7 @@ cdef class PyState:
     cdef readonly list tag_names
     cdef readonly int n_classes
     cdef readonly dict moves_by_name
-    
+
     cdef Move* _moves
     cdef Move* _golds
     cdef State* _s
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 684a9476f..a15d5564e 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -33,7 +33,7 @@ class Scorer(object):
     @property
     def ents_r(self):
         return (self.ents_tp / (self.ents_tp + self.ents_fn + 1e-100)) * 100
-    
+
     @property
     def ents_f(self):
         return (2 * self.ents_p * self.ents_r) / (self.ents_p + self.ents_r + 1e-100)
diff --git a/spacy/spans.pxd b/spacy/spans.pxd
index 94b0cde98..180a991ee 100644
--- a/spacy/spans.pxd
+++ b/spacy/spans.pxd
@@ -5,7 +5,7 @@ from .structs cimport Morphology, TokenC, LexemeC
 from .vocab cimport Vocab
 from .strings cimport StringStore
 
-    
+
 cdef class Span:
     cdef readonly Tokens _seq
     cdef public int i
@@ -15,4 +15,3 @@ cdef class Span:
     cdef public Span head
     cdef public list rights
     cdef public list lefts
-
diff --git a/spacy/structs.pxd b/spacy/structs.pxd
index c1fc13ecd..4892aa7b9 100644
--- a/spacy/structs.pxd
+++ b/spacy/structs.pxd
@@ -8,7 +8,7 @@ cdef struct LexemeC:
     const float* repvec
 
     flags_t flags
-   
+
     attr_t id
     attr_t length
 
@@ -18,7 +18,7 @@ cdef struct LexemeC:
     attr_t shape
     attr_t prefix
     attr_t suffix
- 
+
     attr_t cluster
 
     float prob
diff --git a/spacy/syntax/_parse_features.pxd b/spacy/syntax/_parse_features.pxd
index 1801d1738..0a5965671 100644
--- a/spacy/syntax/_parse_features.pxd
+++ b/spacy/syntax/_parse_features.pxd
@@ -99,7 +99,7 @@ cpdef enum:
     S0_shape
     S0_ne_iob
     S0_ne_type
-    
+
     S0r2w
     S0r2W
     S0r2p
@@ -164,7 +164,7 @@ cpdef enum:
     N0_shape
     N0_ne_iob
     N0_ne_type
- 
+
     N1w
     N1W
     N1p
@@ -190,7 +190,7 @@ cpdef enum:
     N2_shape
     N2_ne_iob
     N2_ne_type
-  
+
     P1w
     P1W
     P1p
@@ -203,7 +203,7 @@ cpdef enum:
     P1_shape
     P1_ne_iob
     P1_ne_type
-    
+
     P2w
     P2W
     P2p
@@ -216,7 +216,7 @@ cpdef enum:
     P2_shape
     P2_ne_iob
     P2_ne_type
-   
+
     E0w
     E0W
     E0p
@@ -229,7 +229,7 @@ cpdef enum:
     E0_shape
     E0_ne_iob
     E0_ne_type
-   
+
     E1w
     E1W
     E1p
@@ -242,7 +242,7 @@ cpdef enum:
     E1_shape
     E1_ne_iob
     E1_ne_type
-   
+
     # Misc features at the end
     dist
     N0lv
diff --git a/spacy/syntax/_parse_features.pyx b/spacy/syntax/_parse_features.pyx
index 5be8ce2ca..381093d17 100644
--- a/spacy/syntax/_parse_features.pyx
+++ b/spacy/syntax/_parse_features.pyx
@@ -111,10 +111,10 @@ ner = (
     (N1W,),
     (P2W,),
     (N2W,),
-    
+
     (P1W, N0W,),
     (N0W, N1W),
-    
+
     (N0_prefix,),
     (N0_suffix,),
 
@@ -205,22 +205,22 @@ ner = (
 unigrams = (
     (S2W, S2p),
     (S2c6, S2p),
-    
+
     (S1W, S1p),
     (S1c6, S1p),
 
     (S0W, S0p),
     (S0c6, S0p),
- 
+
     (N0W, N0p),
     (N0p,),
     (N0c,),
     (N0c6, N0p),
     (N0L,),
- 
+
     (N1W, N1p),
     (N1c6, N1p),
- 
+
     (N2W, N2p),
     (N2c6, N2p),
 
@@ -316,7 +316,7 @@ trigrams = (
     (S0p, S0lp, N0p),
     (S0p, N0p, N0lp),
     (N0p, N0lp, N0l2p),
-    
+
     (S0W, S0p, S0rL, S0r2L),
     (S0p, S0rL, S0r2L),
 
diff --git a/spacy/syntax/_state.pyx b/spacy/syntax/_state.pyx
index 12295905b..37b2fb30e 100644
--- a/spacy/syntax/_state.pyx
+++ b/spacy/syntax/_state.pyx
@@ -27,7 +27,7 @@ cdef int pop_stack(State *s) except -1:
     s.stack -= 1
     if s.stack_len == 0 and not at_eol(s):
         push_stack(s)
-        
+
 
 cdef int push_stack(State *s) except -1:
     assert s.i < s.sent_len
diff --git a/spacy/syntax/arc_eager.pxd b/spacy/syntax/arc_eager.pxd
index 119e07402..606629c66 100644
--- a/spacy/syntax/arc_eager.pxd
+++ b/spacy/syntax/arc_eager.pxd
@@ -3,7 +3,7 @@ from cymem.cymem cimport Pool
 from thinc.typedefs cimport weight_t
 
 
-from ._state cimport State 
+from ._state cimport State
 from .transition_system cimport TransitionSystem, Transition
 
 
diff --git a/spacy/syntax/ner.pyx b/spacy/syntax/ner.pyx
index 8622d7894..29cead45f 100644
--- a/spacy/syntax/ner.pyx
+++ b/spacy/syntax/ner.pyx
@@ -280,5 +280,3 @@ class OracleError(Exception):
 
 class UnknownMove(Exception):
     pass
-
-
diff --git a/spacy/syntax/parser.pyx b/spacy/syntax/parser.pyx
index ab9de48b8..7d34de8b6 100644
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@@ -36,7 +36,7 @@ from . import _parse_features
 from ._parse_features cimport fill_context, CONTEXT_SIZE
 
 
-DEBUG = False 
+DEBUG = False
 def set_debug(val):
     global DEBUG
     DEBUG = val
@@ -112,7 +112,7 @@ cdef class GreedyParser:
             scores = self.model.score(context)
             guess = self.moves.best_valid(scores, state)
             best = self.moves.best_gold(scores, state, gold)
-            
+
             cost = guess.get_cost(&guess, state, gold)
             self.model.update(context, guess.clas, best.clas, cost)
 
diff --git a/spacy/syntax/transition_system.pxd b/spacy/syntax/transition_system.pxd
index 58aa90d99..d3fa0f2a4 100644
--- a/spacy/syntax/transition_system.pxd
+++ b/spacy/syntax/transition_system.pxd
@@ -33,16 +33,16 @@ cdef class TransitionSystem:
     cdef int first_state(self, State* state) except -1
 
     cdef int preprocess_gold(self, GoldParse gold) except -1
-    
+
     cdef Transition lookup_transition(self, object name) except *
-    
+
     cdef Transition init_transition(self, int clas, int move, int label) except *
 
     cdef Transition best_valid(self, const weight_t* scores, const State* state) except *
 
     cdef Transition best_gold(self, const weight_t* scores, const State* state,
                               GoldParse gold) except *
-    
+
 
 #cdef class PyState:
 #    """Provide a Python class for testing purposes."""
diff --git a/spacy/syntax/util.py b/spacy/syntax/util.py
index 3ba770ae2..64b259b6b 100644
--- a/spacy/syntax/util.py
+++ b/spacy/syntax/util.py
@@ -13,5 +13,3 @@ class Config(object):
     @classmethod
     def read(cls, model_dir, name):
         return cls(**json.load(open(path.join(model_dir, '%s.json' % name))))
-
-
diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx
index f20367b08..7a1231a07 100644
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@@ -60,7 +60,7 @@ cdef class Tokenizer:
         split off a suffix, and repeat.
 
         Args:
-            string (unicode): The string to be tokenized. 
+            string (unicode): The string to be tokenized.
 
         Returns:
             tokens (Tokens): A Tokens object, giving access to a sequence of LexemeCs.
@@ -213,7 +213,7 @@ cdef class Tokenizer:
         cdef unicode string = chars[:length]
         match = self._infix_re.search(string)
         return match.start() if match is not None else 0
-    
+
     cdef int _find_prefix(self, Py_UNICODE* chars, size_t length) except -1:
         cdef unicode string = chars[:length]
         match = self._prefix_re.search(string)
diff --git a/spacy/tokens.pxd b/spacy/tokens.pxd
index 2038020bb..9ddd126a1 100644
--- a/spacy/tokens.pxd
+++ b/spacy/tokens.pxd
@@ -31,9 +31,9 @@ cdef inline bint check_flag(const LexemeC* lexeme, attr_id_t flag_id) nogil:
 cdef class Tokens:
     cdef Pool mem
     cdef Vocab vocab
-    
+
     cdef TokenC* data
-    
+
 
     cdef list _py_tokens
     cdef unicode _string
@@ -61,7 +61,7 @@ cdef class Token:
     cdef int array_len
     cdef bint _owns_c_data
 
-    
+
     cdef Tokens _seq
 
     @staticmethod
diff --git a/spacy/tokens.pyx b/spacy/tokens.pyx
index 27d99a045..feed37a48 100644
--- a/spacy/tokens.pyx
+++ b/spacy/tokens.pyx
@@ -104,10 +104,10 @@ cdef class Tokens:
 
     def __getitem__(self, object i):
         """Retrieve a token.
-        
+
         The Python Token objects are created lazily from internal C data, and
         cached in _py_tokens
-        
+
         Returns:
             token (Token):
         """
@@ -180,7 +180,7 @@ cdef class Tokens:
                 yield Span(self, start, i+1)
                 start = None
         if start is not None:
-            yield Span(self, start, self.length) 
+            yield Span(self, start, self.length)
 
     cdef int push_back(self, int idx, LexemeOrToken lex_or_tok) except -1:
         if self.length == self.max_length:
@@ -298,7 +298,7 @@ cdef class Tokens:
         # What to do about morphology??
         # TODO: token.morph = ???
         token.tag = self.vocab.strings[tag]
-        token.lemma = self.vocab.strings[lemma] 
+        token.lemma = self.vocab.strings[lemma]
         if ent_type == 'O':
             token.ent_iob = 2
             token.ent_type = 0
@@ -355,7 +355,7 @@ cdef class Tokens:
         self._py_tokens = [None] * self.length
         # Return the merged Python object
         return self[start]
- 
+
 
 cdef class Token:
     """An individual token --- i.e. a word, a punctuation symbol, etc.  Created
@@ -608,4 +608,3 @@ _parse_unset_error = """Text has not been parsed, so cannot be accessed.
 Check that the parser data is installed. Run "python -m spacy.en.download" if not.
 Check whether parse=False in the call to English.__call__
 """
-
diff --git a/spacy/typedefs.pxd b/spacy/typedefs.pxd
index 3eefab27d..c2c50dbcc 100644
--- a/spacy/typedefs.pxd
+++ b/spacy/typedefs.pxd
@@ -94,5 +94,3 @@ ctypedef uint64_t flags_t
 ctypedef uint32_t id_t
 ctypedef uint16_t len_t
 ctypedef uint16_t tag_t
-
-
diff --git a/spacy/util.py b/spacy/util.py
index 0f1c99979..1d48ab7e9 100644
--- a/spacy/util.py
+++ b/spacy/util.py
@@ -71,7 +71,7 @@ def read_detoken_rules(lang):
         for line in file_:
             entries.append(line.strip())
     return entries
- 
+
 
 def align_tokens(ref, indices):
     start = 0
@@ -87,7 +87,7 @@ def align_tokens(ref, indices):
 
 
 def detokenize(token_rules, words):
-    """To align with treebanks, return a list of "chunks", where a chunk is a 
+    """To align with treebanks, return a list of "chunks", where a chunk is a
     sequence of tokens that are separated by whitespace in actual strings. Each
     chunk should be a tuple of token indices, e.g.
 
diff --git a/spacy/vocab.pxd b/spacy/vocab.pxd
index 25d62cffe..092bedda7 100644
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@@ -31,6 +31,5 @@ cdef class Vocab:
 
     cdef const LexemeC* get(self, Pool mem, UniStr* s) except NULL
     cdef int _add_lex_to_vocab(self, hash_t key, const LexemeC* lex) except -1
-    
+
     cdef PreshMap _map
-  
diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx
index 8ed9805a0..feb609c0e 100644
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@@ -170,7 +170,7 @@ cdef class Vocab:
             self.lexemes[lexeme.id] = lexeme
             i += 1
         fclose(fp)
-    
+
     def load_rep_vectors(self, loc):
         file_ = _CFile(loc, b'rb')
         cdef int32_t word_len
@@ -187,7 +187,7 @@ cdef class Vocab:
             except IOError:
                 break
             file_.read(&vec_len, sizeof(vec_len), 1)
-            
+
             mem = Address(word_len, sizeof(char))
             chars = <char*>mem.ptr
             vec = <float*>self.mem.alloc(vec_len, sizeof(float))
diff --git a/tests/sun.tokens b/tests/sun.tokens
index d16fa1eae..4b912e18e 100644
--- a/tests/sun.tokens
+++ b/tests/sun.tokens
@@ -1,4 +1,4 @@
-The Sun is the star at the center of the Solar System. It is almost perfectly spherical and consists of hot plasma interwoven with magnetic fields. [ 12 ] [ 13 ] It has a diameter of about 1 , 392 , 684 km ( 865 , 374 mi ) , [ 5 ] around 109 times that of Earth , and its mass ( 1.989×1030 kilograms , approximately 330 , 000 times the mass of Earth ) accounts for about 99.86 % of the total mass of the Solar System. [ 14 ] Chemically , about three quarters of the Sun 's mass consists of hydrogen , while the rest is mostly helium. The remaining 1.69 % ( equal to 5 , 600 times the mass of Earth ) consists of heavier elements , including oxygen , carbon , neon and iron , among others. [ 15 ] 
+The Sun is the star at the center of the Solar System. It is almost perfectly spherical and consists of hot plasma interwoven with magnetic fields. [ 12 ] [ 13 ] It has a diameter of about 1 , 392 , 684 km ( 865 , 374 mi ) , [ 5 ] around 109 times that of Earth , and its mass ( 1.989×1030 kilograms , approximately 330 , 000 times the mass of Earth ) accounts for about 99.86 % of the total mass of the Solar System. [ 14 ] Chemically , about three quarters of the Sun 's mass consists of hydrogen , while the rest is mostly helium. The remaining 1.69 % ( equal to 5 , 600 times the mass of Earth ) consists of heavier elements , including oxygen , carbon , neon and iron , among others. [ 15 ]
 
-The Sun formed about 4.567 billion [ a ] [ 16 ] years ago from the gravitational collapse of a region within a large molecular cloud. Most of the matter gathered in the center , while the rest flattened into an orbiting disk that would become the Solar System. The central mass became increasingly hot and dense , eventually initiating thermonuclear fusion in its core. It is thought that almost all stars form by this process. The Sun is a G-type main-sequence star ( G2V ) based on spectral class and it is informally designated as a yellow dwarf because its visible radiation is most intense in the yellow-green portion of the spectrum , and although it is actually white in color , from the surface of the Earth it may appear yellow because of atmospheric scattering of blue light. [ 17 ] In the spectral class label , G2 indicates its surface temperature , of approximately 5778 K ( 5505 °C ) , and V indicates that the Sun , like most stars , is a main-sequence star , and thus generates its energy by nuclear fusion of hydrogen nuclei into helium. In its core , the Sun fuses about 620 million metric tons of hydrogen each second. [ 18 ] [ 19 ] 
-Once regarded by astronomers as a small and relatively insignificant star , the Sun is now thought to be brighter than about 85 % of the stars in the Milky Way , most of which are red dwarfs. [ 20 ] [ 21 ] The absolute magnitude of the Sun is +4.83 ; however , as the star closest to Earth , the Sun is by far the brightest object in the sky with an apparent magnitude of −26.74. [ 22 ] [ 23 ] This is about 13 billion times brighter than the next brightest star , Sirius , with an apparent magnitude of −1.46. The Sun 's hot corona continuously expands in space creating the solar wind , a stream of charged particles that extends to the heliopause at roughly 100 astronomical units. The bubble in the interstellar medium formed by the solar wind , the heliosphere , is the largest continuous structure in the Solar System. [ 24 ] [ 25 ] 
+The Sun formed about 4.567 billion [ a ] [ 16 ] years ago from the gravitational collapse of a region within a large molecular cloud. Most of the matter gathered in the center , while the rest flattened into an orbiting disk that would become the Solar System. The central mass became increasingly hot and dense , eventually initiating thermonuclear fusion in its core. It is thought that almost all stars form by this process. The Sun is a G-type main-sequence star ( G2V ) based on spectral class and it is informally designated as a yellow dwarf because its visible radiation is most intense in the yellow-green portion of the spectrum , and although it is actually white in color , from the surface of the Earth it may appear yellow because of atmospheric scattering of blue light. [ 17 ] In the spectral class label , G2 indicates its surface temperature , of approximately 5778 K ( 5505 °C ) , and V indicates that the Sun , like most stars , is a main-sequence star , and thus generates its energy by nuclear fusion of hydrogen nuclei into helium. In its core , the Sun fuses about 620 million metric tons of hydrogen each second. [ 18 ] [ 19 ]
+Once regarded by astronomers as a small and relatively insignificant star , the Sun is now thought to be brighter than about 85 % of the stars in the Milky Way , most of which are red dwarfs. [ 20 ] [ 21 ] The absolute magnitude of the Sun is +4.83 ; however , as the star closest to Earth , the Sun is by far the brightest object in the sky with an apparent magnitude of −26.74. [ 22 ] [ 23 ] This is about 13 billion times brighter than the next brightest star , Sirius , with an apparent magnitude of −1.46. The Sun 's hot corona continuously expands in space creating the solar wind , a stream of charged particles that extends to the heliopause at roughly 100 astronomical units. The bubble in the interstellar medium formed by the solar wind , the heliosphere , is the largest continuous structure in the Solar System. [ 24 ] [ 25 ]
diff --git a/tests/test_align.py b/tests/test_align.py
index 9d817e107..a603c4a74 100644
--- a/tests/test_align.py
+++ b/tests/test_align.py
@@ -30,6 +30,3 @@ def test_align_continue():
     assert aligned[2] == ('re-align', [(5, 7), (7, 8), (8, 13)])
     assert aligned[3] == ('and', [(13, 16)])
     assert aligned[4] == ('continue', [(16, 24)])
-
-
-
diff --git a/tests/test_array.py b/tests/test_array.py
index 7a08fbb8f..b6f0620c5 100644
--- a/tests/test_array.py
+++ b/tests/test_array.py
@@ -37,5 +37,3 @@ def test_dep():
     assert feats_array[1][1] == tokens[1].dep
     assert feats_array[2][1] == tokens[2].dep
     assert feats_array[3][1] == tokens[3].dep
-
-
diff --git a/tests/test_docs.py b/tests/test_docs.py
index a6a44c154..0c3825d5b 100644
--- a/tests/test_docs.py
+++ b/tests/test_docs.py
@@ -57,7 +57,7 @@ def test3():
     assert sum(o) != 0
     from numpy import dot
     from numpy.linalg import norm
- 
+
     cosine = lambda v1, v2: dot(v1, v2) / (norm(v1) * norm(v2))
     words = [w for w in nlp.vocab if w.check(IS_LOWER) and w.has_repvec]
     words.sort(key=lambda w: cosine(w.repvec, pleaded.repvec))
diff --git a/tests/test_merge.py b/tests/test_merge.py
index 370a334b8..58be79796 100644
--- a/tests/test_merge.py
+++ b/tests/test_merge.py
@@ -35,4 +35,3 @@ def test_merge_heads():
 def test_issue_54():
     text = u'Talks given by women had a slightly higher number of questions asked (3.2$\pm$0.2) than talks given by men (2.6$\pm$0.1).'
     tokens = NLU(text, merge_mwes=True)
-
diff --git a/tests/test_number.py b/tests/test_number.py
index f305c981c..2ca840a06 100644
--- a/tests/test_number.py
+++ b/tests/test_number.py
@@ -33,4 +33,3 @@ def test_word():
 def test_not_number():
     assert not like_number('dog')
     assert not like_number(',')
-
diff --git a/tests/test_parse_navigate.py b/tests/test_parse_navigate.py
index 30e257204..402779399 100644
--- a/tests/test_parse_navigate.py
+++ b/tests/test_parse_navigate.py
@@ -58,4 +58,3 @@ def test_child_consistency(nlp, sun_text):
         assert not children
     for head_index, children in rights.items():
         assert not children
-
diff --git a/tests/test_post_punct.py b/tests/test_post_punct.py
index f61759609..1d29a6ed6 100644
--- a/tests/test_post_punct.py
+++ b/tests/test_post_punct.py
@@ -49,4 +49,3 @@ def test_three_same_close(close_puncts, EN):
 def test_double_end_quote(EN):
     assert len(EN("Hello''")) == 2
     assert len(EN("''")) == 1
-
diff --git a/tests/test_string_loading.py b/tests/test_string_loading.py
index 86cd4f2a9..9f9fde1f8 100644
--- a/tests/test_string_loading.py
+++ b/tests/test_string_loading.py
@@ -16,6 +16,3 @@ def test_one(EN):
     assert tokens[0].orth_ == 'Betty'
     tokens2 = EN('Betty also bought a pound of butter.')
     assert tokens2[0].orth_ == 'Betty'
-
-
-
diff --git a/tests/test_subtree.py b/tests/test_subtree.py
index b25ec233d..0f32105f0 100644
--- a/tests/test_subtree.py
+++ b/tests/test_subtree.py
@@ -16,4 +16,3 @@ def test_subtrees():
     assert len(list(bus.children)) == 1
 
     assert len(list(wheels.subtree)) == 6
-
diff --git a/tests/test_token_api.py b/tests/test_token_api.py
index eb7e1013b..a9a0c5ecd 100644
--- a/tests/test_token_api.py
+++ b/tests/test_token_api.py
@@ -35,5 +35,3 @@ def test_single_token_string():
     nlp = English()
     tokens = nlp(u'foobar')
     assert tokens[0].string == 'foobar'
-
-
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
index 6c3ce3271..f1a132722 100644
--- a/tests/test_tokenizer.py
+++ b/tests/test_tokenizer.py
@@ -63,15 +63,15 @@ def test_contraction_punct(EN):
 def test_sample(EN):
     text = """Tributes pour in for late British Labour Party leader
 
-Tributes poured in from around the world Thursday 
-to the late Labour Party leader John Smith, who died earlier from a massive 
+Tributes poured in from around the world Thursday
+to the late Labour Party leader John Smith, who died earlier from a massive
 heart attack aged 55.
 
-In Washington, the US State Department issued a statement regretting "the 
+In Washington, the US State Department issued a statement regretting "the
 untimely death" of the rapier-tongued Scottish barrister and parliamentarian.
 
 "Mr. Smith, throughout his distinguished"""
-    
+
     tokens = EN(text)
     assert len(tokens) > 5
 
diff --git a/tests/test_whitespace.py b/tests/test_whitespace.py
index a3a700235..19a453c51 100644
--- a/tests/test_whitespace.py
+++ b/tests/test_whitespace.py
@@ -39,5 +39,3 @@ def test_newline_double_space(EN):
 def test_newline_space_wrap(EN):
     tokens = EN('hello \n possums')
     assert len(tokens) == 3
-
-
diff --git a/tests/tokenizer.sed b/tests/tokenizer.sed
index f5f891c49..f39c04178 100644
--- a/tests/tokenizer.sed
+++ b/tests/tokenizer.sed
@@ -20,7 +20,7 @@ s=\.\.\.= ... =g
 s=[,;:@#$%&]= & =g
 
 # Assume sentence tokenization has been done first, so split FINAL periods
-# only. 
+# only.
 s=\([^.]\)\([.]\)\([])}>"']*\)[ 	]*$=\1 \2\3 =g
 # however, we may as well split ALL question marks and exclamation points,
 # since they shouldn't have the abbrev.-marker ambiguity problem