Tidy up with flake8: imports, comparisons, etc.

2026-01-03 15:33:33 +03:00 · 2021-06-28 12:03:29 +02:00 · 2021-06-28 12:03:29 +02:00 · 86d01e9229
commit 86d01e9229
parent 4d1ef8f695
23 changed files with 28 additions and 41 deletions
--- a/setup.cfg
+++ b/setup.cfg
@ -111,7 +111,7 @@ universal = false
 formats = gztar
 [flake8]
-ignore = E203, E266, E501, E731, W503, E741
+ignore = E203, E266, E501, E731, W503, E741, F541
 max-line-length = 80
 select = B,C,E,F,W,T4,B9
 exclude =
--- a/spacy/cli/assemble.py
+++ b/spacy/cli/assemble.py
@ -6,7 +6,6 @@ import logging
 from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
 from ._util import import_code
 from ..training.initialize import init_nlp
 from .. import util
 from ..util import get_sourced_components, load_model_from_config
--- a/spacy/cli/debug_model.py
+++ b/spacy/cli/debug_model.py
@ -1,11 +1,11 @@
-from typing import Dict, Any, Optional, Iterable
+from typing import Dict, Any, Optional
 from pathlib import Path
 import itertools
 from spacy.training import Example
 from spacy.util import resolve_dot_names
 from wasabi import msg
-from thinc.api import fix_random_seed, set_dropout_rate, Adam
+from thinc.api import fix_random_seed, set_dropout_rate
 from thinc.api import Model, data_validation, set_gpu_allocator
 import typer
@ -133,7 +133,6 @@ def debug_model(
        _print_model(model, print_settings)
    # STEP 2: Updating the model and printing again
    optimizer = Adam(0.001)
    set_dropout_rate(model, 0.2)
    # ugly hack to deal with Tok2Vec/Transformer listeners
    upstream_component = None
@ -144,7 +143,6 @@ def debug_model(
        and "transformer-listener" in model.get_ref("tok2vec").name
    ):
        upstream_component = nlp.get_pipe("transformer")
    goldY = None
    for e in range(3):
        if upstream_component:
            upstream_component.update(examples)
--- a/spacy/cli/package.py
+++ b/spacy/cli/package.py
@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
            continue
        col1 = md.bold(md.code(pipe))
        col2 = ", ".join(
-            [md.code(label.replace("|", "\|")) for label in labels]
+            [md.code(label.replace("|", "\\|")) for label in labels]
        )  # noqa: W605
        label_data.append((col1, col2))
        n_labels += len(labels)
--- a/spacy/cli/validate.py
+++ b/spacy/cli/validate.py
@ -5,7 +5,6 @@ import requests
 from wasabi import msg, Printer
 import warnings
 from ..errors import Warnings
 from ._util import app
 from .. import about
 from ..util import get_package_version, get_installed_models, get_minor_version
--- a/spacy/lang/tokenizer_exceptions.py
+++ b/spacy/lang/tokenizer_exceptions.py
@ -35,8 +35,8 @@ URL_PATTERN = (
    # host & domain names
    # mods: match is case-sensitive, so include [A-Z]
    r"(?:"  # noqa: E131
-      r"(?:"
+      r"(?:"  # noqa: E131
-        r"[A-Za-z0-9\u00a1-\uffff]"
+        r"[A-Za-z0-9\u00a1-\uffff]"  # noqa: E131
        r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
      r")?"
      r"[A-Za-z0-9\u00a1-\uffff]\."
--- a/spacy/language.py
+++ b/spacy/language.py
@ -693,7 +693,7 @@ class Language:
            or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
        ):
            warnings.warn(Warnings.W113.format(name=source_name))
-        if not source_name in source.component_names:
+        if source_name not in source.component_names:
            raise KeyError(
                Errors.E944.format(
                    name=source_name,
--- a/spacy/pipeline/entityruler.py
+++ b/spacy/pipeline/entityruler.py
@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
 from collections import defaultdict
 from pathlib import Path
 import srsly
 import warnings
 from .pipe import Pipe
 from ..training import Example
--- a/spacy/tests/doc/test_doc_api.py
+++ b/spacy/tests/doc/test_doc_api.py
@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
    en_docs_tokens = [t for doc in en_docs for t in doc]
    assert len(m_doc) == len(en_docs_tokens)
    think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
-    assert m_doc[2]._.is_ambiguous == True
+    assert m_doc[2]._.is_ambiguous is True
    assert m_doc[9].idx == think_idx
-    assert m_doc[9]._.is_ambiguous == True
+    assert m_doc[9]._.is_ambiguous is True
    assert not any([t._.is_ambiguous for t in m_doc[3:8]])
    assert "group" in m_doc.spans
    assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
--- a/spacy/tests/doc/test_retokenize_merge.py
+++ b/spacy/tests/doc/test_retokenize_merge.py
@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
    assert len(list(doc.sents)) == 2
    with doc.retokenize() as retokenizer:
        retokenizer.merge(doc[3:6])
-    assert doc[3].is_sent_start == None
+    assert doc[3].is_sent_start is None
    # merging over a sentence boundary and setting sent_start
    doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)
--- a/spacy/tests/lang/bg/test_text.py
+++ b/spacy/tests/lang/bg/test_text.py
@ -1,5 +1,4 @@
 import pytest
 from spacy.lang.bg.lex_attrs import like_num
@pytest.mark.parametrize(
--- a/spacy/tests/lang/uk/test_lemmatizer.py
+++ b/spacy/tests/lang/uk/test_lemmatizer.py
@ -1,4 +1,3 @@
 import pytest
 from spacy.tokens import Doc
--- a/spacy/tests/lang/vi/test_serialize.py
+++ b/spacy/tests/lang/vi/test_serialize.py
@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
    nlp_r = Vietnamese()
    nlp_r.from_bytes(nlp_bytes)
    assert nlp_bytes == nlp_r.to_bytes()
-    assert nlp_r.tokenizer.use_pyvi == False
+    assert nlp_r.tokenizer.use_pyvi is False
    with make_tempdir() as d:
        nlp.to_disk(d)
        nlp_r = Vietnamese()
        nlp_r.from_disk(d)
        assert nlp_bytes == nlp_r.to_bytes()
-        assert nlp_r.tokenizer.use_pyvi == False
+        assert nlp_r.tokenizer.use_pyvi is False
--- a/spacy/tests/matcher/test_dependency_matcher.py
+++ b/spacy/tests/matcher/test_dependency_matcher.py
@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
    for token in doc:
        token.head = doc[0]
        token.dep_ = "a"
    get_is_c = lambda token: token.text in ("c",)
    Token.set_extension("is_c", default=False)
    doc[2]._.is_c = True
    pattern = [
--- a/spacy/tests/pipeline/test_annotates_on_update.py
+++ b/spacy/tests/pipeline/test_annotates_on_update.py
@ -1,6 +1,5 @@
 from typing import Callable, Iterable, Iterator
 import pytest
 import io
 from thinc.api import Config
 from spacy.language import Language
--- a/spacy/tests/pipeline/test_entity_linker.py
+++ b/spacy/tests/pipeline/test_entity_linker.py
@ -11,7 +11,7 @@ from spacy.ml import load_kb
 from spacy.scorer import Scorer
 from spacy.training import Example
 from spacy.lang.en import English
-from spacy.tests.util import make_tempdir, make_tempfile
+from spacy.tests.util import make_tempdir
 from spacy.tokens import Span
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@ -132,8 +132,8 @@ def test_incomplete_data():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    assert doc[1].tag_ is "V"
+    assert doc[1].tag_ == "V"
-    assert doc[2].tag_ is "J"
+    assert doc[2].tag_ == "J"
 def test_overfitting_IO():
@ -154,20 +154,20 @@ def test_overfitting_IO():
    # test the trained model
    test_text = "I like blue eggs"
    doc = nlp(test_text)
-    assert doc[0].tag_ is "N"
+    assert doc[0].tag_ == "N"
-    assert doc[1].tag_ is "V"
+    assert doc[1].tag_ == "V"
-    assert doc[2].tag_ is "J"
+    assert doc[2].tag_ == "J"
-    assert doc[3].tag_ is "N"
+    assert doc[3].tag_ == "N"
    # Also test the results are still the same after IO
    with make_tempdir() as tmp_dir:
        nlp.to_disk(tmp_dir)
        nlp2 = util.load_model_from_path(tmp_dir)
        doc2 = nlp2(test_text)
-        assert doc2[0].tag_ is "N"
+        assert doc2[0].tag_ == "N"
-        assert doc2[1].tag_ is "V"
+        assert doc2[1].tag_ == "V"
-        assert doc2[2].tag_ is "J"
+        assert doc2[2].tag_ == "J"
-        assert doc2[3].tag_ is "N"
+        assert doc2[3].tag_ == "N"
    # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
    texts = [
--- a/spacy/tests/regression/test_issue8216.py
+++ b/spacy/tests/regression/test_issue8216.py
@ -2,7 +2,6 @@ import pytest
 from spacy import registry
 from spacy.language import Language
 from spacy.pipeline import EntityRuler
@pytest.fixture
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -8,7 +8,7 @@ from spacy.vocab import Vocab
 from spacy.training import Example
 from spacy.lang.en import English
 from spacy.lang.de import German
-from spacy.util import registry, ignore_error, raise_error, logger
+from spacy.util import registry, ignore_error, raise_error
 import spacy
 from thinc.api import NumpyOps, get_current_ops
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
 from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
 from spacy.util import dot_to_object, SimpleFrozenList, import_file
 from spacy.util import to_ternary_int
-from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
+from thinc.api import Config, Optimizer, ConfigValidationError
 from thinc.api import set_current_ops
 from spacy.training.batchers import minibatch_by_words
 from spacy.lang.en import English
--- a/spacy/tests/tokenizer/test_tokenizer.py
+++ b/spacy/tests/tokenizer/test_tokenizer.py
@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
        suffix_search=suffix_re.search,
        rules=rules,
    )
    tokenizer2 = Tokenizer(
        en_vocab,
        suffix_search=suffix_re.search,
    )
    assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
    tokenizer1.rules = {}
    assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]
--- a/spacy/training/loggers.py
+++ b/spacy/training/loggers.py
@ -110,7 +110,8 @@ def wandb_logger(
 ):
    try:
        import wandb
-        from wandb import init, log, join  # test that these are available
+        # test that these are available
        from wandb import init, log, join  # noqa: F401
    except ImportError:
        raise ImportError(Errors.E880)
--- a/spacy/training/loop.py
+++ b/spacy/training/loop.py
@ -1,4 +1,4 @@
-from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
+from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
 from typing import Optional, TYPE_CHECKING
 from pathlib import Path
 from timeit import default_timer as timer
`@ -1,4 +1,3 @@`
	`import pytest`
	`from spacy.tokens import Doc`	`from spacy.tokens import Doc`