From ef5f548fb0b8f4737a41a838c0d1123752e12346 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sun, 21 Jun 2020 22:38:04 +0200
Subject: [PATCH] Tidy up and auto-format

---
 spacy/lang/en/tokenizer_exceptions.py    | 24 +++++++++++++++---------
 spacy/lang/ja/syntax_iterators.py        |  2 +-
 spacy/lang/ja/tag_bigram_map.py          | 11 +----------
 spacy/lang/ja/tag_orth_map.py            | 14 +++-----------
 spacy/lang/ta/examples.py                |  2 +-
 spacy/lang/tokenizer_exceptions.py       |  2 +-
 spacy/tests/lang/ja/test_serialize.py    |  4 ++--
 spacy/tests/lang/ja/test_tokenizer.py    | 16 ++++++++--------
 spacy/tests/package/test_requirements.py |  8 +++++++-
 spacy/tests/test_misc.py                 |  3 ---
 10 files changed, 39 insertions(+), 47 deletions(-)

diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py
index e024dd483..dc8a5c04d 100644
--- a/spacy/lang/en/tokenizer_exceptions.py
+++ b/spacy/lang/en/tokenizer_exceptions.py
@@ -136,7 +136,19 @@ for pron in ["he", "she", "it"]:
 
 # W-words, relative pronouns, prepositions etc.
 
-for word in ["who", "what", "when", "where", "why", "how", "there", "that", "this", "these", "those"]:
+for word in [
+    "who",
+    "what",
+    "when",
+    "where",
+    "why",
+    "how",
+    "there",
+    "that",
+    "this",
+    "these",
+    "those",
+]:
     for orth in [word, word.title()]:
         _exc[orth + "'s"] = [
             {ORTH: orth, LEMMA: word, NORM: word},
@@ -396,14 +408,8 @@ _other_exc = {
         {ORTH: "Let", LEMMA: "let", NORM: "let"},
         {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"},
     ],
-    "c'mon": [
-        {ORTH: "c'm", NORM: "come", LEMMA: "come"},
-        {ORTH: "on"}
-    ],
-    "C'mon": [
-        {ORTH: "C'm", NORM: "come", LEMMA: "come"},
-        {ORTH: "on"}
-    ]
+    "c'mon": [{ORTH: "c'm", NORM: "come", LEMMA: "come"}, {ORTH: "on"}],
+    "C'mon": [{ORTH: "C'm", NORM: "come", LEMMA: "come"}, {ORTH: "on"}],
 }
 
 _exc.update(_other_exc)
diff --git a/spacy/lang/ja/syntax_iterators.py b/spacy/lang/ja/syntax_iterators.py
index 3f6e4bfa3..bb0554cf9 100644
--- a/spacy/lang/ja/syntax_iterators.py
+++ b/spacy/lang/ja/syntax_iterators.py
@@ -24,7 +24,7 @@ def noun_chunks(obj):
 
     doc = obj.doc  # Ensure works on both Doc and Span.
     np_deps = [doc.vocab.strings.add(label) for label in labels]
-    conj = doc.vocab.strings.add("conj")
+    doc.vocab.strings.add("conj")
     np_label = doc.vocab.strings.add("NP")
     seen = set()
     for i, word in enumerate(obj):
diff --git a/spacy/lang/ja/tag_bigram_map.py b/spacy/lang/ja/tag_bigram_map.py
index 5ed9aec89..9d15fc520 100644
--- a/spacy/lang/ja/tag_bigram_map.py
+++ b/spacy/lang/ja/tag_bigram_map.py
@@ -1,21 +1,15 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, AUX, NOUN, PART, VERB
+from ...symbols import ADJ, AUX, NOUN, PART, VERB
 
 # mapping from tag bi-gram to pos of previous token
 TAG_BIGRAM_MAP = {
     # This covers only small part of AUX.
     ("形容詞-非自立可能", "助詞-終助詞"): (AUX, None),
-
     ("名詞-普通名詞-形状詞可能", "助動詞"): (ADJ, None),
     # ("副詞", "名詞-普通名詞-形状詞可能"): (None, ADJ),
-
     # This covers acl, advcl, obl and root, but has side effect for compound.
     ("名詞-普通名詞-サ変可能", "動詞-非自立可能"): (VERB, AUX),
     # This covers almost all of the deps
     ("名詞-普通名詞-サ変形状詞可能", "動詞-非自立可能"): (VERB, AUX),
-
     ("名詞-普通名詞-副詞可能", "動詞-非自立可能"): (None, VERB),
     ("副詞", "動詞-非自立可能"): (None, VERB),
     ("形容詞-一般", "動詞-非自立可能"): (None, VERB),
@@ -25,12 +19,9 @@ TAG_BIGRAM_MAP = {
     ("助詞-副助詞", "動詞-非自立可能"): (None, VERB),
     ("助詞-格助詞", "動詞-非自立可能"): (None, VERB),
     ("補助記号-読点", "動詞-非自立可能"): (None, VERB),
-
     ("形容詞-一般", "接尾辞-名詞的-一般"): (None, PART),
-
     ("助詞-格助詞", "形状詞-助動詞語幹"): (None, NOUN),
     ("連体詞", "形状詞-助動詞語幹"): (None, NOUN),
-
     ("動詞-一般", "助詞-副助詞"): (None, PART),
     ("動詞-非自立可能", "助詞-副助詞"): (None, PART),
     ("助動詞", "助詞-副助詞"): (None, PART),
diff --git a/spacy/lang/ja/tag_orth_map.py b/spacy/lang/ja/tag_orth_map.py
index 355cc655b..9d32cdea7 100644
--- a/spacy/lang/ja/tag_orth_map.py
+++ b/spacy/lang/ja/tag_orth_map.py
@@ -1,17 +1,9 @@
-# encoding: utf8
-from __future__ import unicode_literals
-
-from ...symbols import POS, ADJ, AUX, DET, PART, PRON, SPACE ,X
+from ...symbols import DET, PART, PRON, SPACE, X
 
 # mapping from tag bi-gram to pos of previous token
 TAG_ORTH_MAP = {
-    "空白": {
-        " ": SPACE,
-        "　": X,
-    },
-    "助詞-副助詞": {
-        "たり": PART,
-    },
+    "空白": {" ": SPACE, "　": X},
+    "助詞-副助詞": {"たり": PART},
     "連体詞": {
         "あの": DET,
         "かの": DET,
diff --git a/spacy/lang/ta/examples.py b/spacy/lang/ta/examples.py
index 245b8ba1a..c3c47e66e 100644
--- a/spacy/lang/ta/examples.py
+++ b/spacy/lang/ta/examples.py
@@ -18,5 +18,5 @@ sentences = [
     "ஆப்பிள் நிறுவனம் யு.கே. தொடக்க நிறுவனத்தை ஒரு லட்சம் கோடிக்கு வாங்கப் பார்க்கிறது",
     "தன்னாட்சி கார்கள் காப்பீட்டு பொறுப்பை உற்பத்தியாளரிடம் மாற்றுகின்றன",
     "நடைபாதை விநியோக ரோபோக்களை தடை செய்வதை சான் பிரான்சிஸ்கோ கருதுகிறது",
-    "லண்டன் ஐக்கிய இராச்சியத்தில் ஒரு பெரிய நகரம்."
+    "லண்டன் ஐக்கிய இராச்சியத்தில் ஒரு பெரிய நகரம்.",
 ]
diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py
index 28bc51228..f732a9097 100644
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@@ -1,6 +1,6 @@
 import re
 
-from .char_classes import ALPHA_LOWER, ALPHA
+from .char_classes import ALPHA_LOWER
 from ..symbols import ORTH, POS, TAG, LEMMA, SPACE
 
 
diff --git a/spacy/tests/lang/ja/test_serialize.py b/spacy/tests/lang/ja/test_serialize.py
index 9e703e63d..4d4174b03 100644
--- a/spacy/tests/lang/ja/test_serialize.py
+++ b/spacy/tests/lang/ja/test_serialize.py
@@ -7,7 +7,7 @@ def test_ja_tokenizer_serialize(ja_tokenizer):
     nlp = Japanese()
     nlp.tokenizer.from_bytes(tokenizer_bytes)
     assert tokenizer_bytes == nlp.tokenizer.to_bytes()
-    assert nlp.tokenizer.split_mode == None
+    assert nlp.tokenizer.split_mode is None
 
     with make_tempdir() as d:
         file_path = d / "tokenizer"
@@ -15,7 +15,7 @@ def test_ja_tokenizer_serialize(ja_tokenizer):
         nlp = Japanese()
         nlp.tokenizer.from_disk(file_path)
         assert tokenizer_bytes == nlp.tokenizer.to_bytes()
-        assert nlp.tokenizer.split_mode == None
+        assert nlp.tokenizer.split_mode is None
 
     # split mode is (de)serialized correctly
     nlp = Japanese(meta={"tokenizer": {"config": {"split_mode": "B"}}})
diff --git a/spacy/tests/lang/ja/test_tokenizer.py b/spacy/tests/lang/ja/test_tokenizer.py
index ee532cb81..f76a9067a 100644
--- a/spacy/tests/lang/ja/test_tokenizer.py
+++ b/spacy/tests/lang/ja/test_tokenizer.py
@@ -29,10 +29,9 @@ POS_TESTS = [
 ]
 
 SENTENCE_TESTS = [
-        ('あれ。これ。', ['あれ。', 'これ。']),
-        ('「伝染るんです。」という漫画があります。', 
-            ['「伝染るんです。」という漫画があります。']),
-        ]
+    ("あれ。これ。", ["あれ。", "これ。"]),
+    ("「伝染るんです。」という漫画があります。", ["「伝染るんです。」という漫画があります。"]),
+]
 # fmt: on
 
 
@@ -48,7 +47,7 @@ def test_ja_tokenizer_tags(ja_tokenizer, text, expected_tags):
     assert tags == expected_tags
 
 
-#XXX This isn't working? Always passes
+# XXX This isn't working? Always passes
 @pytest.mark.parametrize("text,expected_pos", POS_TESTS)
 def test_ja_tokenizer_pos(ja_tokenizer, text, expected_pos):
     pos = [token.pos_ for token in ja_tokenizer(text)]
@@ -57,7 +56,7 @@ def test_ja_tokenizer_pos(ja_tokenizer, text, expected_pos):
 
 @pytest.mark.skip(reason="sentence segmentation in tokenizer is buggy")
 @pytest.mark.parametrize("text,expected_sents", SENTENCE_TESTS)
-def test_ja_tokenizer_pos(ja_tokenizer, text, expected_sents):
+def test_ja_tokenizer_sents(ja_tokenizer, text, expected_sents):
     sents = [str(sent) for sent in ja_tokenizer(text).sents]
     assert sents == expected_sents
 
@@ -74,13 +73,14 @@ def test_ja_tokenizer_naughty_strings(ja_tokenizer, text):
     assert tokens.text_with_ws == text
 
 
-@pytest.mark.parametrize("text,len_a,len_b,len_c",
+@pytest.mark.parametrize(
+    "text,len_a,len_b,len_c",
     [
         ("選挙管理委員会", 4, 3, 1),
         ("客室乗務員", 3, 2, 1),
         ("労働者協同組合", 4, 3, 1),
         ("機能性食品", 3, 2, 1),
-    ]
+    ],
 )
 def test_ja_tokenizer_split_modes(ja_tokenizer, text, len_a, len_b, len_c):
     nlp_a = Japanese(meta={"tokenizer": {"config": {"split_mode": "A"}}})
diff --git a/spacy/tests/package/test_requirements.py b/spacy/tests/package/test_requirements.py
index a7c9a3ea4..6cc8fa6a8 100644
--- a/spacy/tests/package/test_requirements.py
+++ b/spacy/tests/package/test_requirements.py
@@ -10,7 +10,13 @@ def test_build_dependencies():
         "mock",
         "flake8",
     ]
-    libs_ignore_setup = ["fugashi", "natto-py", "pythainlp", "sudachipy", "sudachidict_core"]
+    libs_ignore_setup = [
+        "fugashi",
+        "natto-py",
+        "pythainlp",
+        "sudachipy",
+        "sudachidict_core",
+    ]
 
     # check requirements.txt
     req_dict = {}
diff --git a/spacy/tests/test_misc.py b/spacy/tests/test_misc.py
index 5f9e72f79..f6724f632 100644
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@@ -1,7 +1,6 @@
 import pytest
 import os
 import ctypes
-import srsly
 from pathlib import Path
 from spacy.about import __version__ as spacy_version
 from spacy import util
@@ -9,8 +8,6 @@ from spacy import prefer_gpu, require_gpu
 from spacy.ml._precomputable_affine import PrecomputableAffine
 from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
 
-from .util import make_tempdir
-
 
 @pytest.fixture
 def is_admin():