Tidy up with flake8: imports, comparisons, etc.

This commit is contained in:
Adriane Boyd 2021-06-28 12:03:29 +02:00
parent 4d1ef8f695
commit 86d01e9229
23 changed files with 28 additions and 41 deletions

View File

@ -111,7 +111,7 @@ universal = false
formats = gztar
[flake8]
ignore = E203, E266, E501, E731, W503, E741
ignore = E203, E266, E501, E731, W503, E741, F541
max-line-length = 80
select = B,C,E,F,W,T4,B9
exclude =

View File

@ -6,7 +6,6 @@ import logging
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code
from ..training.initialize import init_nlp
from .. import util
from ..util import get_sourced_components, load_model_from_config

View File

@ -1,11 +1,11 @@
from typing import Dict, Any, Optional, Iterable
from typing import Dict, Any, Optional
from pathlib import Path
import itertools
from spacy.training import Example
from spacy.util import resolve_dot_names
from wasabi import msg
from thinc.api import fix_random_seed, set_dropout_rate, Adam
from thinc.api import fix_random_seed, set_dropout_rate
from thinc.api import Model, data_validation, set_gpu_allocator
import typer
@ -133,7 +133,6 @@ def debug_model(
_print_model(model, print_settings)
# STEP 2: Updating the model and printing again
optimizer = Adam(0.001)
set_dropout_rate(model, 0.2)
# ugly hack to deal with Tok2Vec/Transformer listeners
upstream_component = None
@ -144,7 +143,6 @@ def debug_model(
and "transformer-listener" in model.get_ref("tok2vec").name
):
upstream_component = nlp.get_pipe("transformer")
goldY = None
for e in range(3):
if upstream_component:
upstream_component.update(examples)

View File

@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
continue
col1 = md.bold(md.code(pipe))
col2 = ", ".join(
[md.code(label.replace("|", "\|")) for label in labels]
[md.code(label.replace("|", "\\|")) for label in labels]
) # noqa: W605
label_data.append((col1, col2))
n_labels += len(labels)

View File

@ -5,7 +5,6 @@ import requests
from wasabi import msg, Printer
import warnings
from ..errors import Warnings
from ._util import app
from .. import about
from ..util import get_package_version, get_installed_models, get_minor_version

View File

@ -35,8 +35,8 @@ URL_PATTERN = (
# host & domain names
# mods: match is case-sensitive, so include [A-Z]
r"(?:" # noqa: E131
r"(?:"
r"[A-Za-z0-9\u00a1-\uffff]"
r"(?:" # noqa: E131
r"[A-Za-z0-9\u00a1-\uffff]" # noqa: E131
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
r")?"
r"[A-Za-z0-9\u00a1-\uffff]\."

View File

@ -693,7 +693,7 @@ class Language:
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
):
warnings.warn(Warnings.W113.format(name=source_name))
if not source_name in source.component_names:
if source_name not in source.component_names:
raise KeyError(
Errors.E944.format(
name=source_name,

View File

@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
from collections import defaultdict
from pathlib import Path
import srsly
import warnings
from .pipe import Pipe
from ..training import Example

View File

@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
en_docs_tokens = [t for doc in en_docs for t in doc]
assert len(m_doc) == len(en_docs_tokens)
think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
assert m_doc[2]._.is_ambiguous == True
assert m_doc[2]._.is_ambiguous is True
assert m_doc[9].idx == think_idx
assert m_doc[9]._.is_ambiguous == True
assert m_doc[9]._.is_ambiguous is True
assert not any([t._.is_ambiguous for t in m_doc[3:8]])
assert "group" in m_doc.spans
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])

View File

@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
assert len(list(doc.sents)) == 2
with doc.retokenize() as retokenizer:
retokenizer.merge(doc[3:6])
assert doc[3].is_sent_start == None
assert doc[3].is_sent_start is None
# merging over a sentence boundary and setting sent_start
doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)

View File

@ -1,5 +1,4 @@
import pytest
from spacy.lang.bg.lex_attrs import like_num
@pytest.mark.parametrize(

View File

@ -1,4 +1,3 @@
import pytest
from spacy.tokens import Doc

View File

@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
nlp_r = Vietnamese()
nlp_r.from_bytes(nlp_bytes)
assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi == False
assert nlp_r.tokenizer.use_pyvi is False
with make_tempdir() as d:
nlp.to_disk(d)
nlp_r = Vietnamese()
nlp_r.from_disk(d)
assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi == False
assert nlp_r.tokenizer.use_pyvi is False

View File

@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
for token in doc:
token.head = doc[0]
token.dep_ = "a"
get_is_c = lambda token: token.text in ("c",)
Token.set_extension("is_c", default=False)
doc[2]._.is_c = True
pattern = [

View File

@ -1,6 +1,5 @@
from typing import Callable, Iterable, Iterator
import pytest
import io
from thinc.api import Config
from spacy.language import Language

View File

@ -11,7 +11,7 @@ from spacy.ml import load_kb
from spacy.scorer import Scorer
from spacy.training import Example
from spacy.lang.en import English
from spacy.tests.util import make_tempdir, make_tempfile
from spacy.tests.util import make_tempdir
from spacy.tokens import Span

View File

@ -132,8 +132,8 @@ def test_incomplete_data():
# test the trained model
test_text = "I like blue eggs"
doc = nlp(test_text)
assert doc[1].tag_ is "V"
assert doc[2].tag_ is "J"
assert doc[1].tag_ == "V"
assert doc[2].tag_ == "J"
def test_overfitting_IO():
@ -154,20 +154,20 @@ def test_overfitting_IO():
# test the trained model
test_text = "I like blue eggs"
doc = nlp(test_text)
assert doc[0].tag_ is "N"
assert doc[1].tag_ is "V"
assert doc[2].tag_ is "J"
assert doc[3].tag_ is "N"
assert doc[0].tag_ == "N"
assert doc[1].tag_ == "V"
assert doc[2].tag_ == "J"
assert doc[3].tag_ == "N"
# Also test the results are still the same after IO
with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir)
nlp2 = util.load_model_from_path(tmp_dir)
doc2 = nlp2(test_text)
assert doc2[0].tag_ is "N"
assert doc2[1].tag_ is "V"
assert doc2[2].tag_ is "J"
assert doc2[3].tag_ is "N"
assert doc2[0].tag_ == "N"
assert doc2[1].tag_ == "V"
assert doc2[2].tag_ == "J"
assert doc2[3].tag_ == "N"
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
texts = [

View File

@ -2,7 +2,6 @@ import pytest
from spacy import registry
from spacy.language import Language
from spacy.pipeline import EntityRuler
@pytest.fixture

View File

@ -8,7 +8,7 @@ from spacy.vocab import Vocab
from spacy.training import Example
from spacy.lang.en import English
from spacy.lang.de import German
from spacy.util import registry, ignore_error, raise_error, logger
from spacy.util import registry, ignore_error, raise_error
import spacy
from thinc.api import NumpyOps, get_current_ops

View File

@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
from spacy.util import dot_to_object, SimpleFrozenList, import_file
from spacy.util import to_ternary_int
from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
from thinc.api import Config, Optimizer, ConfigValidationError
from thinc.api import set_current_ops
from spacy.training.batchers import minibatch_by_words
from spacy.lang.en import English

View File

@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
suffix_search=suffix_re.search,
rules=rules,
)
tokenizer2 = Tokenizer(
en_vocab,
suffix_search=suffix_re.search,
)
assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
tokenizer1.rules = {}
assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]

View File

@ -110,7 +110,8 @@ def wandb_logger(
):
try:
import wandb
from wandb import init, log, join # test that these are available
# test that these are available
from wandb import init, log, join # noqa: F401
except ImportError:
raise ImportError(Errors.E880)

View File

@ -1,4 +1,4 @@
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
from typing import Optional, TYPE_CHECKING
from pathlib import Path
from timeit import default_timer as timer