Tidy up with flake8: imports, comparisons, etc.

This commit is contained in:
Adriane Boyd 2021-06-28 12:03:29 +02:00
parent 4d1ef8f695
commit 86d01e9229
23 changed files with 28 additions and 41 deletions

View File

@ -111,7 +111,7 @@ universal = false
formats = gztar formats = gztar
[flake8] [flake8]
ignore = E203, E266, E501, E731, W503, E741 ignore = E203, E266, E501, E731, W503, E741, F541
max-line-length = 80 max-line-length = 80
select = B,C,E,F,W,T4,B9 select = B,C,E,F,W,T4,B9
exclude = exclude =

View File

@ -6,7 +6,6 @@ import logging
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
from ._util import import_code from ._util import import_code
from ..training.initialize import init_nlp
from .. import util from .. import util
from ..util import get_sourced_components, load_model_from_config from ..util import get_sourced_components, load_model_from_config

View File

@ -1,11 +1,11 @@
from typing import Dict, Any, Optional, Iterable from typing import Dict, Any, Optional
from pathlib import Path from pathlib import Path
import itertools import itertools
from spacy.training import Example from spacy.training import Example
from spacy.util import resolve_dot_names from spacy.util import resolve_dot_names
from wasabi import msg from wasabi import msg
from thinc.api import fix_random_seed, set_dropout_rate, Adam from thinc.api import fix_random_seed, set_dropout_rate
from thinc.api import Model, data_validation, set_gpu_allocator from thinc.api import Model, data_validation, set_gpu_allocator
import typer import typer
@ -133,7 +133,6 @@ def debug_model(
_print_model(model, print_settings) _print_model(model, print_settings)
# STEP 2: Updating the model and printing again # STEP 2: Updating the model and printing again
optimizer = Adam(0.001)
set_dropout_rate(model, 0.2) set_dropout_rate(model, 0.2)
# ugly hack to deal with Tok2Vec/Transformer listeners # ugly hack to deal with Tok2Vec/Transformer listeners
upstream_component = None upstream_component = None
@ -144,7 +143,6 @@ def debug_model(
and "transformer-listener" in model.get_ref("tok2vec").name and "transformer-listener" in model.get_ref("tok2vec").name
): ):
upstream_component = nlp.get_pipe("transformer") upstream_component = nlp.get_pipe("transformer")
goldY = None
for e in range(3): for e in range(3):
if upstream_component: if upstream_component:
upstream_component.update(examples) upstream_component.update(examples)

View File

@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
continue continue
col1 = md.bold(md.code(pipe)) col1 = md.bold(md.code(pipe))
col2 = ", ".join( col2 = ", ".join(
[md.code(label.replace("|", "\|")) for label in labels] [md.code(label.replace("|", "\\|")) for label in labels]
) # noqa: W605 ) # noqa: W605
label_data.append((col1, col2)) label_data.append((col1, col2))
n_labels += len(labels) n_labels += len(labels)

View File

@ -5,7 +5,6 @@ import requests
from wasabi import msg, Printer from wasabi import msg, Printer
import warnings import warnings
from ..errors import Warnings
from ._util import app from ._util import app
from .. import about from .. import about
from ..util import get_package_version, get_installed_models, get_minor_version from ..util import get_package_version, get_installed_models, get_minor_version

View File

@ -35,8 +35,8 @@ URL_PATTERN = (
# host & domain names # host & domain names
# mods: match is case-sensitive, so include [A-Z] # mods: match is case-sensitive, so include [A-Z]
r"(?:" # noqa: E131 r"(?:" # noqa: E131
r"(?:" r"(?:" # noqa: E131
r"[A-Za-z0-9\u00a1-\uffff]" r"[A-Za-z0-9\u00a1-\uffff]" # noqa: E131
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}" r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
r")?" r")?"
r"[A-Za-z0-9\u00a1-\uffff]\." r"[A-Za-z0-9\u00a1-\uffff]\."

View File

@ -693,7 +693,7 @@ class Language:
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes() or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
): ):
warnings.warn(Warnings.W113.format(name=source_name)) warnings.warn(Warnings.W113.format(name=source_name))
if not source_name in source.component_names: if source_name not in source.component_names:
raise KeyError( raise KeyError(
Errors.E944.format( Errors.E944.format(
name=source_name, name=source_name,

View File

@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
from collections import defaultdict from collections import defaultdict
from pathlib import Path from pathlib import Path
import srsly import srsly
import warnings
from .pipe import Pipe from .pipe import Pipe
from ..training import Example from ..training import Example

View File

@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
en_docs_tokens = [t for doc in en_docs for t in doc] en_docs_tokens = [t for doc in en_docs for t in doc]
assert len(m_doc) == len(en_docs_tokens) assert len(m_doc) == len(en_docs_tokens)
think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think") think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
assert m_doc[2]._.is_ambiguous == True assert m_doc[2]._.is_ambiguous is True
assert m_doc[9].idx == think_idx assert m_doc[9].idx == think_idx
assert m_doc[9]._.is_ambiguous == True assert m_doc[9]._.is_ambiguous is True
assert not any([t._.is_ambiguous for t in m_doc[3:8]]) assert not any([t._.is_ambiguous for t in m_doc[3:8]])
assert "group" in m_doc.spans assert "group" in m_doc.spans
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]]) assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])

View File

@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
assert len(list(doc.sents)) == 2 assert len(list(doc.sents)) == 2
with doc.retokenize() as retokenizer: with doc.retokenize() as retokenizer:
retokenizer.merge(doc[3:6]) retokenizer.merge(doc[3:6])
assert doc[3].is_sent_start == None assert doc[3].is_sent_start is None
# merging over a sentence boundary and setting sent_start # merging over a sentence boundary and setting sent_start
doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts) doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)

View File

@ -1,5 +1,4 @@
import pytest import pytest
from spacy.lang.bg.lex_attrs import like_num
@pytest.mark.parametrize( @pytest.mark.parametrize(

View File

@ -1,4 +1,3 @@
import pytest
from spacy.tokens import Doc from spacy.tokens import Doc

View File

@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
nlp_r = Vietnamese() nlp_r = Vietnamese()
nlp_r.from_bytes(nlp_bytes) nlp_r.from_bytes(nlp_bytes)
assert nlp_bytes == nlp_r.to_bytes() assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi == False assert nlp_r.tokenizer.use_pyvi is False
with make_tempdir() as d: with make_tempdir() as d:
nlp.to_disk(d) nlp.to_disk(d)
nlp_r = Vietnamese() nlp_r = Vietnamese()
nlp_r.from_disk(d) nlp_r.from_disk(d)
assert nlp_bytes == nlp_r.to_bytes() assert nlp_bytes == nlp_r.to_bytes()
assert nlp_r.tokenizer.use_pyvi == False assert nlp_r.tokenizer.use_pyvi is False

View File

@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
for token in doc: for token in doc:
token.head = doc[0] token.head = doc[0]
token.dep_ = "a" token.dep_ = "a"
get_is_c = lambda token: token.text in ("c",)
Token.set_extension("is_c", default=False) Token.set_extension("is_c", default=False)
doc[2]._.is_c = True doc[2]._.is_c = True
pattern = [ pattern = [

View File

@ -1,6 +1,5 @@
from typing import Callable, Iterable, Iterator from typing import Callable, Iterable, Iterator
import pytest import pytest
import io
from thinc.api import Config from thinc.api import Config
from spacy.language import Language from spacy.language import Language

View File

@ -11,7 +11,7 @@ from spacy.ml import load_kb
from spacy.scorer import Scorer from spacy.scorer import Scorer
from spacy.training import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.tests.util import make_tempdir, make_tempfile from spacy.tests.util import make_tempdir
from spacy.tokens import Span from spacy.tokens import Span

View File

@ -132,8 +132,8 @@ def test_incomplete_data():
# test the trained model # test the trained model
test_text = "I like blue eggs" test_text = "I like blue eggs"
doc = nlp(test_text) doc = nlp(test_text)
assert doc[1].tag_ is "V" assert doc[1].tag_ == "V"
assert doc[2].tag_ is "J" assert doc[2].tag_ == "J"
def test_overfitting_IO(): def test_overfitting_IO():
@ -154,20 +154,20 @@ def test_overfitting_IO():
# test the trained model # test the trained model
test_text = "I like blue eggs" test_text = "I like blue eggs"
doc = nlp(test_text) doc = nlp(test_text)
assert doc[0].tag_ is "N" assert doc[0].tag_ == "N"
assert doc[1].tag_ is "V" assert doc[1].tag_ == "V"
assert doc[2].tag_ is "J" assert doc[2].tag_ == "J"
assert doc[3].tag_ is "N" assert doc[3].tag_ == "N"
# Also test the results are still the same after IO # Also test the results are still the same after IO
with make_tempdir() as tmp_dir: with make_tempdir() as tmp_dir:
nlp.to_disk(tmp_dir) nlp.to_disk(tmp_dir)
nlp2 = util.load_model_from_path(tmp_dir) nlp2 = util.load_model_from_path(tmp_dir)
doc2 = nlp2(test_text) doc2 = nlp2(test_text)
assert doc2[0].tag_ is "N" assert doc2[0].tag_ == "N"
assert doc2[1].tag_ is "V" assert doc2[1].tag_ == "V"
assert doc2[2].tag_ is "J" assert doc2[2].tag_ == "J"
assert doc2[3].tag_ is "N" assert doc2[3].tag_ == "N"
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions # Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
texts = [ texts = [

View File

@ -2,7 +2,6 @@ import pytest
from spacy import registry from spacy import registry
from spacy.language import Language from spacy.language import Language
from spacy.pipeline import EntityRuler
@pytest.fixture @pytest.fixture

View File

@ -8,7 +8,7 @@ from spacy.vocab import Vocab
from spacy.training import Example from spacy.training import Example
from spacy.lang.en import English from spacy.lang.en import English
from spacy.lang.de import German from spacy.lang.de import German
from spacy.util import registry, ignore_error, raise_error, logger from spacy.util import registry, ignore_error, raise_error
import spacy import spacy
from thinc.api import NumpyOps, get_current_ops from thinc.api import NumpyOps, get_current_ops

View File

@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
from spacy.util import dot_to_object, SimpleFrozenList, import_file from spacy.util import dot_to_object, SimpleFrozenList, import_file
from spacy.util import to_ternary_int from spacy.util import to_ternary_int
from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops from thinc.api import Config, Optimizer, ConfigValidationError
from thinc.api import set_current_ops from thinc.api import set_current_ops
from spacy.training.batchers import minibatch_by_words from spacy.training.batchers import minibatch_by_words
from spacy.lang.en import English from spacy.lang.en import English

View File

@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
suffix_search=suffix_re.search, suffix_search=suffix_re.search,
rules=rules, rules=rules,
) )
tokenizer2 = Tokenizer(
en_vocab,
suffix_search=suffix_re.search,
)
assert [t.text for t in tokenizer1("a a.")] == ["a a", "."] assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
tokenizer1.rules = {} tokenizer1.rules = {}
assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."] assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]

View File

@ -110,7 +110,8 @@ def wandb_logger(
): ):
try: try:
import wandb import wandb
from wandb import init, log, join # test that these are available # test that these are available
from wandb import init, log, join # noqa: F401
except ImportError: except ImportError:
raise ImportError(Errors.E880) raise ImportError(Errors.E880)

View File

@ -1,4 +1,4 @@
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
from typing import Optional, TYPE_CHECKING from typing import Optional, TYPE_CHECKING
from pathlib import Path from pathlib import Path
from timeit import default_timer as timer from timeit import default_timer as timer