From af253236534a69ccdef1428cb0d8b6b7461e271c Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Wed, 11 Sep 2019 14:00:36 +0200
Subject: [PATCH] Tidy up and auto-format

---
 spacy/_ml.py                          | 50 ++++++++++++---------------
 spacy/cli/train.py                    | 13 +++++--
 spacy/errors.py                       |  1 +
 spacy/lang/de/__init__.py             | 18 +++++++---
 spacy/lang/en/__init__.py             | 12 ++++---
 spacy/lang/en/morph_rules.py          | 48 ++++++++++++-------------
 spacy/lang/en/tokenizer_exceptions.py |  7 +---
 spacy/lemmatizer.py                   | 11 +++---
 spacy/lookups.py                      |  1 +
 spacy/tests/parser/test_ner.py        | 26 ++------------
 10 files changed, 90 insertions(+), 97 deletions(-)

diff --git a/spacy/_ml.py b/spacy/_ml.py
index 97660f8f9..d81ceccc1 100644
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@@ -348,7 +348,7 @@ def Tok2Vec(width, embed_size, **kwargs):
         if pretrained_vectors is not None:
             glove = StaticVectors(pretrained_vectors, width, column=cols.index(ID))
 
-            if subword_features: 
+            if subword_features:
                 embed = uniqued(
                     (glove | norm | prefix | suffix | shape)
                     >> LN(Maxout(width, width * 5, pieces=3)),
@@ -363,14 +363,16 @@ def Tok2Vec(width, embed_size, **kwargs):
             embed = uniqued(
                 (norm | prefix | suffix | shape)
                 >> LN(Maxout(width, width * 4, pieces=3)),
-                column=cols.index(ORTH)
+                column=cols.index(ORTH),
             )
-        elif char_embed: 
+        elif char_embed:
             embed = concatenate_lists(
                 CharacterEmbed(nM=64, nC=8),
-                FeatureExtracter(cols) >> with_flatten(norm)
+                FeatureExtracter(cols) >> with_flatten(norm),
+            )
+            reduce_dimensions = LN(
+                Maxout(width, 64 * 8 + width, pieces=cnn_maxout_pieces)
             )
-            reduce_dimensions = LN(Maxout(width, 64*8+width, pieces=cnn_maxout_pieces))
         else:
             embed = norm
 
@@ -379,22 +381,14 @@ def Tok2Vec(width, embed_size, **kwargs):
             >> LN(Maxout(width, width * 3, pieces=cnn_maxout_pieces))
         )
         if char_embed:
-            tok2vec = (
-                embed
-                >> with_flatten(
-                    reduce_dimensions
-                    >> convolution ** conv_depth, pad=conv_depth
-                )
+            tok2vec = embed >> with_flatten(
+                reduce_dimensions >> convolution ** conv_depth, pad=conv_depth
             )
         else:
-            tok2vec = (
-                FeatureExtracter(cols)
-                >> with_flatten(
-                    embed
-                    >> convolution ** conv_depth, pad=conv_depth
-                )
+            tok2vec = FeatureExtracter(cols) >> with_flatten(
+                embed >> convolution ** conv_depth, pad=conv_depth
             )
- 
+
         if bilstm_depth >= 1:
             tok2vec = tok2vec >> PyTorchBiLSTM(width, width, bilstm_depth)
         # Work around thinc API limitations :(. TODO: Revise in Thinc 7
@@ -611,9 +605,7 @@ def build_morphologizer_model(class_nums, **cfg):
                 char_embed=char_embed,
                 pretrained_vectors=pretrained_vectors,
             )
-        softmax = with_flatten(
-            MultiSoftmax(class_nums, token_vector_width)
-        )
+        softmax = with_flatten(MultiSoftmax(class_nums, token_vector_width))
         softmax.out_sizes = class_nums
         model = tok2vec >> softmax
     model.nI = None
@@ -906,16 +898,17 @@ def _replace_word(word, random_words, mask="[MASK]"):
 def _uniform_init(lo, hi):
     def wrapped(W, ops):
         copy_array(W, ops.xp.random.uniform(lo, hi, W.shape))
+
     return wrapped
 
 
 @describe.attributes(
     nM=Dimension("Vector dimensions"),
     nC=Dimension("Number of characters per word"),
-    vectors=Synapses("Embed matrix",
-        lambda obj: (obj.nC, obj.nV, obj.nM),
-        _uniform_init(-0.1, 0.1)),
-    d_vectors=Gradient("vectors")
+    vectors=Synapses(
+        "Embed matrix", lambda obj: (obj.nC, obj.nV, obj.nM), _uniform_init(-0.1, 0.1)
+    ),
+    d_vectors=Gradient("vectors"),
 )
 class CharacterEmbed(Model):
     def __init__(self, nM=None, nC=None, **kwargs):
@@ -926,12 +919,12 @@ class CharacterEmbed(Model):
     @property
     def nO(self):
         return self.nM * self.nC
-    
+
     @property
     def nV(self):
         return 256
 
-    def begin_update(self, docs, drop=0.):
+    def begin_update(self, docs, drop=0.0):
         if not docs:
             return []
         ids = []
@@ -959,6 +952,7 @@ class CharacterEmbed(Model):
             if sgd is not None:
                 sgd(self._mem.weights, self._mem.gradient, key=self.id)
             return None
+
         return output, backprop_character_embed
 
 
@@ -974,4 +968,4 @@ def get_cossim_loss(yh, y):
     cosine = (yh * y).sum(axis=1, keepdims=True) / mul_norms
     d_yh = (y / mul_norms) - (cosine * (yh / norm_yh ** 2))
     loss = xp.abs(cosine - 1).sum()
-    return loss, -d_yh
\ No newline at end of file
+    return loss, -d_yh
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 365e7ea44..8d162362c 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -64,7 +64,12 @@ from .. import about
         str,
     ),
     noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
-    orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float),
+    orth_variant_level=(
+        "Amount of orthography variation for data augmentation",
+        "option",
+        "ovl",
+        float,
+    ),
     eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
@@ -245,7 +250,11 @@ def train(
         best_score = 0.0
         for i in range(n_iter):
             train_docs = corpus.train_docs(
-                nlp, noise_level=noise_level, orth_variant_level=orth_variant_level, gold_preproc=gold_preproc, max_length=0
+                nlp,
+                noise_level=noise_level,
+                orth_variant_level=orth_variant_level,
+                gold_preproc=gold_preproc,
+                max_length=0,
             )
             if raw_text:
                 random.shuffle(raw_text)
diff --git a/spacy/errors.py b/spacy/errors.py
index c0868800d..b8a8dccba 100644
--- a/spacy/errors.py
+++ b/spacy/errors.py
@@ -456,6 +456,7 @@ class Errors(object):
     E159 = ("Can't find table '{name}' in lookups. Available tables: {tables}")
     E160 = ("Can't find language data file: {path}")
 
+
 @add_codes
 class TempErrors(object):
     T003 = ("Resizing pre-trained Tagger models is not currently supported.")
diff --git a/spacy/lang/de/__init__.py b/spacy/lang/de/__init__.py
index 1ddee54b3..b96069235 100644
--- a/spacy/lang/de/__init__.py
+++ b/spacy/lang/de/__init__.py
@@ -27,10 +27,20 @@ class GermanDefaults(Language.Defaults):
     stop_words = STOP_WORDS
     syntax_iterators = SYNTAX_ITERATORS
     resources = {"lemma_lookup": "lemma_lookup.json"}
-    single_orth_variants = [{"tags": ["$("], "variants": ["…", "..."]},
-            {"tags": ["$("], "variants": ["-", "—", "–", "--", "---", "——"]}]
-    paired_orth_variants = [{"tags": ["$("], "variants": [("'", "'"), (",", "'"), ("‚", "‘"), ("›", "‹"), ("‹", "›")]},
-            {"tags": ["$("], "variants": [("``", "''"), ('"', '"'), ("„", "“"), ("»", "«"), ("«", "»")]}]
+    single_orth_variants = [
+        {"tags": ["$("], "variants": ["…", "..."]},
+        {"tags": ["$("], "variants": ["-", "—", "–", "--", "---", "——"]},
+    ]
+    paired_orth_variants = [
+        {
+            "tags": ["$("],
+            "variants": [("'", "'"), (",", "'"), ("‚", "‘"), ("›", "‹"), ("‹", "›")],
+        },
+        {
+            "tags": ["$("],
+            "variants": [("``", "''"), ('"', '"'), ("„", "“"), ("»", "«"), ("«", "»")],
+        },
+    ]
 
 
 class German(Language):
diff --git a/spacy/lang/en/__init__.py b/spacy/lang/en/__init__.py
index 2f391de0b..e4c745c83 100644
--- a/spacy/lang/en/__init__.py
+++ b/spacy/lang/en/__init__.py
@@ -38,10 +38,14 @@ class EnglishDefaults(Language.Defaults):
         "lemma_index": "lemmatizer/lemma_index.json",
         "lemma_exc": "lemmatizer/lemma_exc.json",
     }
-    single_orth_variants = [{"tags": ["NFP"], "variants": ["…", "..."]},
-            {"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]}]
-    paired_orth_variants = [{"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
-            {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]}]
+    single_orth_variants = [
+        {"tags": ["NFP"], "variants": ["…", "..."]},
+        {"tags": [":"], "variants": ["-", "—", "–", "--", "---", "——"]},
+    ]
+    paired_orth_variants = [
+        {"tags": ["``", "''"], "variants": [("'", "'"), ("‘", "’")]},
+        {"tags": ["``", "''"], "variants": [('"', '"'), ("“", "”")]},
+    ]
 
 
 class English(Language):
diff --git a/spacy/lang/en/morph_rules.py b/spacy/lang/en/morph_rules.py
index f910e42b8..5ed4eac59 100644
--- a/spacy/lang/en/morph_rules.py
+++ b/spacy/lang/en/morph_rules.py
@@ -12,50 +12,50 @@ _subordinating_conjunctions = [
     "if",
     "as",
     "because",
-    #"of",
-    #"for",
-    #"before",
-    #"in",
+    # "of",
+    # "for",
+    # "before",
+    # "in",
     "while",
-    #"after",
+    # "after",
     "since",
     "like",
-    #"with",
+    # "with",
     "so",
-    #"to",
-    #"by",
-    #"on",
-    #"about",
+    # "to",
+    # "by",
+    # "on",
+    # "about",
     "than",
     "whether",
     "although",
-    #"from",
+    # "from",
     "though",
-    #"until",
+    # "until",
     "unless",
     "once",
-    #"without",
-    #"at",
-    #"into",
+    # "without",
+    # "at",
+    # "into",
     "cause",
-    #"over",
+    # "over",
     "upon",
     "till",
     "whereas",
-    #"beyond",
+    # "beyond",
     "whilst",
     "except",
     "despite",
     "wether",
-    #"then",
+    # "then",
     "but",
     "becuse",
     "whie",
-    #"below",
-    #"against",
+    # "below",
+    # "against",
     "it",
     "w/out",
-    #"toward",
+    # "toward",
     "albeit",
     "save",
     "besides",
@@ -67,17 +67,17 @@ _subordinating_conjunctions = [
     "out",
     "near",
     "seince",
-    #"towards",
+    # "towards",
     "tho",
     "sice",
     "will",
 ]
 
 # This seems kind of wrong too?
-#_relative_pronouns = ["this", "that", "those", "these"]
+# _relative_pronouns = ["this", "that", "those", "these"]
 
 MORPH_RULES = {
-    #"DT": {word: {"POS": "PRON"} for word in _relative_pronouns},
+    # "DT": {word: {"POS": "PRON"} for word in _relative_pronouns},
     "IN": {word: {"POS": "SCONJ"} for word in _subordinating_conjunctions},
     "NN": {
         "something": {"POS": "PRON"},
diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index 91c29c9e0..c45197771 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -30,12 +30,7 @@ for pron in ["i"]:
     for orth in [pron, pron.title()]:
         _exc[orth + "'m"] = [
             {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"},
-            {
-                ORTH: "'m",
-                LEMMA: "be",
-                NORM: "am",
-                TAG: "VBP",
-            },
+            {ORTH: "'m", LEMMA: "be", NORM: "am", TAG: "VBP"},
         ]
 
         _exc[orth + "m"] = [
diff --git a/spacy/lemmatizer.py b/spacy/lemmatizer.py
index c9ccbcd0d..d14f5292e 100644
--- a/spacy/lemmatizer.py
+++ b/spacy/lemmatizer.py
@@ -2,8 +2,7 @@
 from __future__ import unicode_literals
 from collections import OrderedDict
 
-from .symbols import POS, NOUN, VERB, ADJ, PUNCT, PROPN
-from .symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
+from .symbols import NOUN, VERB, ADJ, PUNCT, PROPN
 
 
 class Lemmatizer(object):
@@ -71,13 +70,13 @@ class Lemmatizer(object):
             return True
         elif univ_pos == "adj" and morphology.get("Degree") == "pos":
             return True
-        elif morphology.get('VerbForm') == 'inf':
+        elif morphology.get("VerbForm") == "inf":
             return True
-        elif morphology.get('VerbForm') == 'none':
+        elif morphology.get("VerbForm") == "none":
             return True
-        elif morphology.get('VerbForm') == 'inf':
+        elif morphology.get("VerbForm") == "inf":
             return True
-        elif morphology.get('Degree') == 'pos':
+        elif morphology.get("Degree") == "pos":
             return True
         else:
             return False
diff --git a/spacy/lookups.py b/spacy/lookups.py
index 801b4d00d..741d40330 100644
--- a/spacy/lookups.py
+++ b/spacy/lookups.py
@@ -137,6 +137,7 @@ class Table(OrderedDict):
     """A table in the lookups. Subclass of builtin dict that implements a
     slightly more consistent and unified API.
     """
+
     @classmethod
     def from_dict(cls, data, name=None):
         self = cls(name=name)
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index c39491ecf..db911dba0 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -119,28 +119,8 @@ def test_oracle_moves_missing_B(en_vocab):
 
 
 def test_oracle_moves_whitespace(en_vocab):
-    words = [
-        "production",
-        "\n",
-        "of",
-        "Northrop",
-        "\n",
-        "Corp.",
-        "\n",
-        "'s",
-        "radar",
-    ]
-    biluo_tags = [
-        "O",
-        "O",
-        "O",
-        "B-ORG",
-        None,
-        "I-ORG",
-        "L-ORG",
-        "O",
-        "O",
-    ]
+    words = ["production", "\n", "of", "Northrop", "\n", "Corp.", "\n", "'s", "radar"]
+    biluo_tags = ["O", "O", "O", "B-ORG", None, "I-ORG", "L-ORG", "O", "O"]
 
     doc = Doc(en_vocab, words=words)
     gold = GoldParse(doc, words=words, entities=biluo_tags)
@@ -156,4 +136,4 @@ def test_oracle_moves_whitespace(en_vocab):
             action, label = tag.split("-")
             moves.add_action(move_types.index(action), label)
     moves.preprocess_gold(gold)
-    seq = moves.get_oracle_sequence(doc, gold)
+    moves.get_oracle_sequence(doc, gold)