From 3eaeb733427331b98f76412f43676fc1742d4d12 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sun, 9 Aug 2020 22:36:23 +0200
Subject: [PATCH] Tidy up and auto-format

---
 spacy/gold/batchers.py                        | 12 ++++++++----
 spacy/lang/en/lemmatizer.py                   |  2 --
 spacy/language.py                             |  6 +-----
 spacy/pipeline/tok2vec.py                     |  6 ++++--
 spacy/scorer.py                               |  2 +-
 spacy/tests/morphology/test_morph_pickle.py   |  6 ++++--
 spacy/tests/parser/test_ner.py                |  9 +++------
 spacy/tests/pipeline/test_lemmatizer.py       |  3 +--
 spacy/tests/pipeline/test_tagger.py           |  2 --
 spacy/tests/pipeline/test_textcat.py          |  4 +---
 spacy/tests/regression/test_issue1-1000.py    |  2 --
 spacy/tests/regression/test_issue1001-1500.py |  3 +--
 spacy/tests/regression/test_issue1501-2000.py |  3 +--
 spacy/tests/regression/test_issue3501-4000.py |  6 +++++-
 spacy/tests/regression/test_issue4001-4500.py |  6 ++----
 15 files changed, 32 insertions(+), 40 deletions(-)

diff --git a/spacy/gold/batchers.py b/spacy/gold/batchers.py
index c15b88502..ec1f35815 100644
--- a/spacy/gold/batchers.py
+++ b/spacy/gold/batchers.py
@@ -1,4 +1,4 @@
-from typing import Union, Iterator, Iterable, Sequence, TypeVar, List, Callable
+from typing import Union, Iterable, Sequence, TypeVar, List, Callable
 from typing import Optional, Any
 from functools import partial
 import itertools
@@ -20,7 +20,7 @@ def configure_minibatch_by_padded_size(
     get_length: Optional[Callable[[ItemT], int]] = None
 ) -> BatcherT:
     """Create a batcher that uses the `batch_by_padded_size` strategy.
-    
+
     The padded size is defined as the maximum length of sequences within the
     batch multiplied by the number of sequences in the batch.
 
@@ -92,7 +92,7 @@ def minibatch_by_padded_size(
 ) -> Iterable[List[ItemT]]:
     """Minibatch a sequence by the size of padded batches that would result,
     with sequences binned by length within a window.
-    
+
     The padded size is defined as the maximum length of sequences within the
     batch multiplied by the number of sequences in the batch.
 
@@ -123,7 +123,11 @@ def minibatch_by_padded_size(
 
 
 def minibatch_by_words(
-    seqs: Iterable[ItemT], size: Sizing, tolerance=0.2, discard_oversize=False, get_length=len
+    seqs: Iterable[ItemT],
+    size: Sizing,
+    tolerance=0.2,
+    discard_oversize=False,
+    get_length=len,
 ) -> Iterable[List[ItemT]]:
     """Create minibatches of roughly a given number of words. If any examples
     are longer than the specified batch length, they will appear in a batch by
diff --git a/spacy/lang/en/lemmatizer.py b/spacy/lang/en/lemmatizer.py
index b8bef39b9..be389f117 100644
--- a/spacy/lang/en/lemmatizer.py
+++ b/spacy/lang/en/lemmatizer.py
@@ -1,5 +1,3 @@
-from typing import Optional
-
 from ...pipeline import Lemmatizer
 from ...tokens import Token
 
diff --git a/spacy/language.py b/spacy/language.py
index 96661915a..85aac15ef 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -27,7 +27,6 @@ from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
 from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .lang.punctuation import TOKENIZER_INFIXES
 from .tokens import Doc
-from .lookups import load_lookups
 from .tokenizer import Tokenizer
 from .errors import Errors, Warnings
 from .schemas import ConfigSchema
@@ -1439,10 +1438,7 @@ class Language:
                 or lang_cls is not cls
             ):
                 raise ValueError(Errors.E943.format(value=type(lang_cls)))
-        nlp = lang_cls(
-            vocab=vocab,
-            create_tokenizer=create_tokenizer,
-        )
+        nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
         if after_creation is not None:
             nlp = after_creation(nlp)
             if not isinstance(nlp, cls):
diff --git a/spacy/pipeline/tok2vec.py b/spacy/pipeline/tok2vec.py
index 54b7987ff..c9f0a99e9 100644
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@@ -34,11 +34,11 @@ def make_tok2vec(nlp: Language, name: str, model: Model) -> "Tok2Vec":
 class Tok2Vec(Pipe):
     """Apply a "token-to-vector" model and set its outputs in the doc.tensor
     attribute. This is mostly useful to share a single subnetwork between multiple
-    components, e.g. to have one embedding and CNN network shared between a 
+    components, e.g. to have one embedding and CNN network shared between a
     parser, tagger and NER.
 
     In order to use the `Tok2Vec` predictions, subsequent components should use
-    the `Tok2VecListener` layer as the tok2vec subnetwork of their model. This 
+    the `Tok2VecListener` layer as the tok2vec subnetwork of their model. This
     layer will read data from the `doc.tensor` attribute during prediction.
     During training, the `Tok2Vec` component will save its prediction and backprop
     callback for each batch, so that the subsequent components can backpropagate
@@ -46,6 +46,7 @@ class Tok2Vec(Pipe):
     avoid relying on object identity within the models to achieve the parameter
     sharing.
     """
+
     def __init__(self, vocab: Vocab, model: Model, name: str = "tok2vec") -> None:
         """Initialize a tok2vec component.
 
@@ -239,6 +240,7 @@ class Tok2VecListener(Model):
     from the Tok2Vec component into downstream components, and communicating
     gradients back upstream.
     """
+
     name = "tok2vec-listener"
 
     def __init__(self, upstream_name: str, width: int) -> None:
diff --git a/spacy/scorer.py b/spacy/scorer.py
index 4a81d39d0..d77881ad0 100644
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@@ -426,7 +426,7 @@ class Scorer:
             f"{attr}_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
         }
         if len(labels) == 2 and not multi_label and positive_label:
-            positive_label_f = results[f"{attr}_f_per_type"][positive_label]['f']
+            positive_label_f = results[f"{attr}_f_per_type"][positive_label]["f"]
             results[f"{attr}_score"] = positive_label_f
             results[f"{attr}_score_desc"] = f"F ({positive_label})"
         elif not multi_label:
diff --git a/spacy/tests/morphology/test_morph_pickle.py b/spacy/tests/morphology/test_morph_pickle.py
index 0758a6c01..d9b0e3476 100644
--- a/spacy/tests/morphology/test_morph_pickle.py
+++ b/spacy/tests/morphology/test_morph_pickle.py
@@ -15,5 +15,7 @@ def morphology():
 def test_morphology_pickle_roundtrip(morphology):
     b = pickle.dumps(morphology)
     reloaded_morphology = pickle.loads(b)
-    assert reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"]) == "Feat1=Val1|Feat2=Val2"
-    assert reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"]) == "Feat3=Val3|Feat4=Val4"
+    feat = reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"])
+    assert feat == "Feat1=Val1|Feat2=Val2"
+    feat = reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"])
+    assert feat == "Feat3=Val3|Feat4=Val4"
diff --git a/spacy/tests/parser/test_ner.py b/spacy/tests/parser/test_ner.py
index 60ba5246f..0ffe74273 100644
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@@ -144,8 +144,7 @@ def test_accept_blocked_token():
     # 1. test normal behaviour
     nlp1 = English()
     doc1 = nlp1("I live in New York")
-    config = {
-    }
+    config = {}
     ner1 = nlp1.create_pipe("ner", config=config)
     assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
     assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@@ -164,8 +163,7 @@ def test_accept_blocked_token():
     # 2. test blocking behaviour
     nlp2 = English()
     doc2 = nlp2("I live in New York")
-    config = {
-    }
+    config = {}
     ner2 = nlp2.create_pipe("ner", config=config)
 
     # set "New York" to a blocked entity
@@ -220,8 +218,7 @@ def test_overwrite_token():
     assert [token.ent_iob_ for token in doc] == ["O", "O", "O", "O", "O"]
     assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
     # Check that a new ner can overwrite O
-    config = {
-    }
+    config = {}
     ner2 = nlp.create_pipe("ner", config=config)
     ner2.moves.add_action(5, "")
     ner2.add_label("GPE")
diff --git a/spacy/tests/pipeline/test_lemmatizer.py b/spacy/tests/pipeline/test_lemmatizer.py
index 644fa0f01..8a70fdeeb 100644
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@@ -1,8 +1,7 @@
 import pytest
-
 from spacy import util, registry
 from spacy.lang.en import English
-from spacy.lookups import Lookups, load_lookups
+from spacy.lookups import Lookups
 
 from ..util import make_tempdir
 
diff --git a/spacy/tests/pipeline/test_tagger.py b/spacy/tests/pipeline/test_tagger.py
index 5f27a0afa..1af4a5121 100644
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@@ -1,10 +1,8 @@
 import pytest
-
 from spacy import util
 from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.language import Language
-from spacy.symbols import POS, NOUN
 
 from ..util import make_tempdir
 
diff --git a/spacy/tests/pipeline/test_textcat.py b/spacy/tests/pipeline/test_textcat.py
index 363a16a11..17add7391 100644
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@@ -117,9 +117,7 @@ def test_overfitting_IO():
         assert cats2["POSITIVE"] + cats2["NEGATIVE"] == pytest.approx(1.0, 0.1)
 
     # Test scoring
-    scores = nlp.evaluate(
-        train_examples, scorer_cfg={"positive_label": "POSITIVE"}
-    )
+    scores = nlp.evaluate(train_examples, scorer_cfg={"positive_label": "POSITIVE"})
     assert scores["cats_micro_f"] == 1.0
     assert scores["cats_score"] == 1.0
     assert "cats_score_desc" in scores
diff --git a/spacy/tests/regression/test_issue1-1000.py b/spacy/tests/regression/test_issue1-1000.py
index b642ca229..5c93ea3c8 100644
--- a/spacy/tests/regression/test_issue1-1000.py
+++ b/spacy/tests/regression/test_issue1-1000.py
@@ -1,11 +1,9 @@
 import pytest
 import random
-
 from spacy import util
 from spacy.gold import Example
 from spacy.matcher import Matcher
 from spacy.attrs import IS_PUNCT, ORTH, LOWER
-from spacy.symbols import POS, VERB
 from spacy.vocab import Vocab
 from spacy.lang.en import English
 from spacy.lookups import Lookups
diff --git a/spacy/tests/regression/test_issue1001-1500.py b/spacy/tests/regression/test_issue1001-1500.py
index 0ac895546..d6a4600e3 100644
--- a/spacy/tests/regression/test_issue1001-1500.py
+++ b/spacy/tests/regression/test_issue1001-1500.py
@@ -6,8 +6,7 @@ from spacy.lang.en import English
 from spacy.lang.lex_attrs import LEX_ATTRS
 from spacy.matcher import Matcher
 from spacy.tokenizer import Tokenizer
-from spacy.lookups import Lookups
-from spacy.symbols import ORTH, LEMMA, POS, VERB
+from spacy.symbols import ORTH, LEMMA, POS
 
 
 def test_issue1061():
diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py
index eee22c93d..4988575ea 100644
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@@ -271,8 +271,7 @@ def test_issue1963(en_tokenizer):
 @pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
     nlp = Language()
-    config = {
-    }
+    config = {}
     ner = nlp.create_pipe("ner", config=config)
     example = Example.from_dict(
         Doc(ner.vocab, words=["word"]),
diff --git a/spacy/tests/regression/test_issue3501-4000.py b/spacy/tests/regression/test_issue3501-4000.py
index e42779ad7..de554a5ec 100644
--- a/spacy/tests/regression/test_issue3501-4000.py
+++ b/spacy/tests/regression/test_issue3501-4000.py
@@ -157,7 +157,11 @@ def test_issue3540(en_vocab):
 
     with doc.retokenize() as retokenizer:
         heads = [(doc[3], 1), doc[2]]
-        attrs = {"POS": ["PROPN", "PROPN"], "LEMMA": ["New", "York"], "DEP": ["pobj", "compound"]}
+        attrs = {
+            "POS": ["PROPN", "PROPN"],
+            "LEMMA": ["New", "York"],
+            "DEP": ["pobj", "compound"],
+        }
         retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
 
     gold_text = ["I", "live", "in", "New", "York", "right", "now"]
diff --git a/spacy/tests/regression/test_issue4001-4500.py b/spacy/tests/regression/test_issue4001-4500.py
index ad577cbe5..423015106 100644
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@@ -138,8 +138,7 @@ def test_issue4042_bug2():
         if not output_dir.exists():
             output_dir.mkdir()
         ner1.to_disk(output_dir)
-        config = {
-        }
+        config = {}
         ner2 = nlp1.create_pipe("ner", config=config)
         ner2.from_disk(output_dir)
         assert len(ner2.labels) == 2
@@ -301,8 +300,7 @@ def test_issue4313():
     beam_width = 16
     beam_density = 0.0001
     nlp = English()
-    config = {
-    }
+    config = {}
     ner = nlp.create_pipe("ner", config=config)
     ner.add_label("SOME_LABEL")
     ner.begin_training([])