Tidy up with flake8: imports, comparisons, etc.

2025-08-02 03:10:22 +03:00 · 2021-06-28 12:03:29 +02:00 · 2021-06-28 12:03:29 +02:00 · 86d01e9229
commit 86d01e9229
parent 4d1ef8f695
23 changed files with 28 additions and 41 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -111,7 +111,7 @@ universal = false
 formats = gztar

 [flake8]
-ignore = E203, E266, E501, E731, W503, E741
+ignore = E203, E266, E501, E731, W503, E741, F541
 max-line-length = 80
 select = B,C,E,F,W,T4,B9
 exclude =
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -6,7 +6,6 @@ import logging

 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code
-from ..training.initialize import init_nlp
 from .. import util
 from ..util import get_sourced_components, load_model_from_config

--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -1,11 +1,11 @@
-from typing import Dict, Any, Optional, Iterable
+from typing import Dict, Any, Optional
 from pathlib import Path
 import itertools

 from spacy.training import Example
 from spacy.util import resolve_dot_names
 from wasabi import msg
-from thinc.api import fix_random_seed, set_dropout_rate, Adam
+from thinc.api import fix_random_seed, set_dropout_rate
 from thinc.api import Model, data_validation, set_gpu_allocator
 import typer

@ -133,7 +133,6 @@ def debug_model(
        _print_model(model, print_settings)

    # STEP 2: Updating the model and printing again
-    optimizer = Adam(0.001)
    set_dropout_rate(model, 0.2)
    # ugly hack to deal with Tok2Vec/Transformer listeners
    upstream_component = None
@ -144,7 +143,6 @@ def debug_model(
        and "transformer-listener" in model.get_ref("tok2vec").name
    ):
        upstream_component = nlp.get_pipe("transformer")
-    goldY = None
    for e in range(3):
        if upstream_component:
            upstream_component.update(examples)
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
            continue
        col1 = md.bold(md.code(pipe))
        col2 = ", ".join(
-            [md.code(label.replace("|", "\|")) for label in labels]
+            [md.code(label.replace("|", "\\|")) for label in labels]
        )  # noqa: W605
        label_data.append((col1, col2))
        n_labels += len(labels)
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -5,7 +5,6 @@ import requests
 from wasabi import msg, Printer
 import warnings

-from ..errors import Warnings
 from ._util import app
 from .. import about
 from ..util import get_package_version, get_installed_models, get_minor_version
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@ -35,8 +35,8 @@ URL_PATTERN = (
    # host & domain names
    # mods: match is case-sensitive, so include [A-Z]
    r"(?:"  # noqa: E131
-      r"(?:"
-        r"[A-Za-z0-9\u00a1-\uffff]"
+      r"(?:"  # noqa: E131
+        r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
        r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
      r")?"
      r"[A-Za-z0-9\u00a1-\uffff]\."
--- a/spacy/language.py
+++ b/spacy/language.py
@ -693,7 +693,7 @@ class Language:
            or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
        ):
            warnings.warn(Warnings.W113.format(name=source_name))
-        if not source_name in source.component_names:
+        if source_name not in source.component_names:
            raise KeyError(
                Errors.E944.format(
                    name=source_name,
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
 from collections import defaultdict
 from pathlib import Path
 import srsly
-import warnings

 from .pipe import Pipe
 from ..training import Example
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
    en_docs_tokens = [t for doc in en_docs for t in doc]
    assert len(m_doc) == len(en_docs_tokens)
    think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
-    assert m_doc[2]._.is_ambiguous == True
+    assert m_doc[2]._.is_ambiguous is True
    assert m_doc[9].idx == think_idx
-    assert m_doc[9]._.is_ambiguous == True
+    assert m_doc[9]._.is_ambiguous is True
    assert not any([t._.is_ambiguous for t in m_doc[3:8]])
    assert "group" in m_doc.spans
    assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
    assert len(list(doc.sents)) == 2
    with doc.retokenize() as retokenizer:
        retokenizer.merge(doc[3:6])
-    assert doc[3].is_sent_start == None
+    assert doc[3].is_sent_start is None

    # merging over a sentence boundary and setting sent_start
    doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)
--- a/spacy/tests/lang/bg/test_text.py
+++ b/spacy/tests/lang/bg/test_text.py
@ -1,5 +1,4 @@
 import pytest
-from spacy.lang.bg.lex_attrs import like_num


@pytest.mark.parametrize(
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@ -1,4 +1,3 @@
-import pytest
 from spacy.tokens import Doc


--- a/spacy/tests/lang/vi/test_serialize.py
+++ b/spacy/tests/lang/vi/test_serialize.py
@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
    nlp_r = Vietnamese()
    nlp_r.from_bytes(nlp_bytes)
    assert nlp_bytes == nlp_r.to_bytes()
-    assert nlp_r.tokenizer.use_pyvi == False
+    assert nlp_r.tokenizer.use_pyvi is False

    with make_tempdir() as d:
        nlp.to_disk(d)
        nlp_r = Vietnamese()
        nlp_r.from_disk(d)
        assert nlp_bytes == nlp_r.to_bytes()
-        assert nlp_r.tokenizer.use_pyvi == False
+        assert nlp_r.tokenizer.use_pyvi is False
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
    for token in doc:
        token.head = doc[0]
        token.dep_ = "a"
-    get_is_c = lambda token: token.text in ("c",)
    Token.set_extension("is_c", default=False)
    doc[2]._.is_c = True
    pattern = [
--- a/spacy/tests/pipeline/test_annotates_on_update.py
+++ b/spacy/tests/pipeline/test_annotates_on_update.py
@ -1,6 +1,5 @@
 from typing import Callable, Iterable, Iterator
 import pytest
-import io

 from thinc.api import Config
 from spacy.language import Language
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -11,7 +11,7 @@ from spacy.ml import load_kb
 from spacy.scorer import Scorer
 from spacy.training import Example
 from spacy.lang.en import English
-from spacy.tests.util import make_tempdir, make_tempfile
+from spacy.tests.util import make_tempdir
 from spacy.tokens import Span


--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@ -132,8 +132,8 @@ def test_incomplete_data():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    assert doc[1].tag_ is "V"
-    assert doc[2].tag_ is "J"
+    assert doc[1].tag_ == "V"
+    assert doc[2].tag_ == "J"


 def test_overfitting_IO():
@ -154,20 +154,20 @@ def test_overfitting_IO():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    assert doc[0].tag_ is "N"
-    assert doc[1].tag_ is "V"
-    assert doc[2].tag_ is "J"
-    assert doc[3].tag_ is "N"
+    assert doc[0].tag_ == "N"
+    assert doc[1].tag_ == "V"
+    assert doc[2].tag_ == "J"
+    assert doc[3].tag_ == "N"

    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
        nlp.to_disk(tmp_dir)
        nlp2 = util.load_model_from_path(tmp_dir)
        doc2 = nlp2(test_text)
-        assert doc2[0].tag_ is "N"
-        assert doc2[1].tag_ is "V"
-        assert doc2[2].tag_ is "J"
-        assert doc2[3].tag_ is "N"
+        assert doc2[0].tag_ == "N"
+        assert doc2[1].tag_ == "V"
+        assert doc2[2].tag_ == "J"
+        assert doc2[3].tag_ == "N"

    # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
    texts = [
--- a/spacy/tests/regression/test_issue8216.py
+++ b/spacy/tests/regression/test_issue8216.py
@ -2,7 +2,6 @@ import pytest

 from spacy import registry
 from spacy.language import Language
-from spacy.pipeline import EntityRuler


@pytest.fixture
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -8,7 +8,7 @@ from spacy.vocab import Vocab
 from spacy.training import Example
 from spacy.lang.en import English
 from spacy.lang.de import German
-from spacy.util import registry, ignore_error, raise_error, logger
+from spacy.util import registry, ignore_error, raise_error
 import spacy
 from thinc.api import NumpyOps, get_current_ops

--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
 from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
 from spacy.util import dot_to_object, SimpleFrozenList, import_file
 from spacy.util import to_ternary_int
-from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
+from thinc.api import Config, Optimizer, ConfigValidationError
 from thinc.api import set_current_ops
 from spacy.training.batchers import minibatch_by_words
 from spacy.lang.en import English
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
        suffix_search=suffix_re.search,
        rules=rules,
    )
-    tokenizer2 = Tokenizer(
-        en_vocab,
-        suffix_search=suffix_re.search,
-    )
    assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
    tokenizer1.rules = {}
    assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@ -110,7 +110,8 @@ def wandb_logger(
 ):
    try:
        import wandb
-        from wandb import init, log, join  # test that these are available
+        # test that these are available
+        from wandb import init, log, join  # noqa: F401
    except ImportError:
        raise ImportError(Errors.E880)

--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@ -1,4 +1,4 @@
-from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
+from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
 from typing import Optional, TYPE_CHECKING
 from pathlib import Path
 from timeit import default_timer as timer