mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Tidy up with flake8: imports, comparisons, etc.
This commit is contained in:
parent
4d1ef8f695
commit
86d01e9229
|
@ -111,7 +111,7 @@ universal = false
|
|||
formats = gztar
|
||||
|
||||
[flake8]
|
||||
ignore = E203, E266, E501, E731, W503, E741
|
||||
ignore = E203, E266, E501, E731, W503, E741, F541
|
||||
max-line-length = 80
|
||||
select = B,C,E,F,W,T4,B9
|
||||
exclude =
|
||||
|
|
|
@ -6,7 +6,6 @@ import logging
|
|||
|
||||
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
|
||||
from ._util import import_code
|
||||
from ..training.initialize import init_nlp
|
||||
from .. import util
|
||||
from ..util import get_sourced_components, load_model_from_config
|
||||
|
||||
|
|
|
@ -1,11 +1,11 @@
|
|||
from typing import Dict, Any, Optional, Iterable
|
||||
from typing import Dict, Any, Optional
|
||||
from pathlib import Path
|
||||
import itertools
|
||||
|
||||
from spacy.training import Example
|
||||
from spacy.util import resolve_dot_names
|
||||
from wasabi import msg
|
||||
from thinc.api import fix_random_seed, set_dropout_rate, Adam
|
||||
from thinc.api import fix_random_seed, set_dropout_rate
|
||||
from thinc.api import Model, data_validation, set_gpu_allocator
|
||||
import typer
|
||||
|
||||
|
@ -133,7 +133,6 @@ def debug_model(
|
|||
_print_model(model, print_settings)
|
||||
|
||||
# STEP 2: Updating the model and printing again
|
||||
optimizer = Adam(0.001)
|
||||
set_dropout_rate(model, 0.2)
|
||||
# ugly hack to deal with Tok2Vec/Transformer listeners
|
||||
upstream_component = None
|
||||
|
@ -144,7 +143,6 @@ def debug_model(
|
|||
and "transformer-listener" in model.get_ref("tok2vec").name
|
||||
):
|
||||
upstream_component = nlp.get_pipe("transformer")
|
||||
goldY = None
|
||||
for e in range(3):
|
||||
if upstream_component:
|
||||
upstream_component.update(examples)
|
||||
|
|
|
@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
|
|||
continue
|
||||
col1 = md.bold(md.code(pipe))
|
||||
col2 = ", ".join(
|
||||
[md.code(label.replace("|", "\|")) for label in labels]
|
||||
[md.code(label.replace("|", "\\|")) for label in labels]
|
||||
) # noqa: W605
|
||||
label_data.append((col1, col2))
|
||||
n_labels += len(labels)
|
||||
|
|
|
@ -5,7 +5,6 @@ import requests
|
|||
from wasabi import msg, Printer
|
||||
import warnings
|
||||
|
||||
from ..errors import Warnings
|
||||
from ._util import app
|
||||
from .. import about
|
||||
from ..util import get_package_version, get_installed_models, get_minor_version
|
||||
|
|
|
@ -35,8 +35,8 @@ URL_PATTERN = (
|
|||
# host & domain names
|
||||
# mods: match is case-sensitive, so include [A-Z]
|
||||
r"(?:" # noqa: E131
|
||||
r"(?:"
|
||||
r"[A-Za-z0-9\u00a1-\uffff]"
|
||||
r"(?:" # noqa: E131
|
||||
r"[A-Za-z0-9\u00a1-\uffff]" # noqa: E131
|
||||
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
||||
r")?"
|
||||
r"[A-Za-z0-9\u00a1-\uffff]\."
|
||||
|
|
|
@ -693,7 +693,7 @@ class Language:
|
|||
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
|
||||
):
|
||||
warnings.warn(Warnings.W113.format(name=source_name))
|
||||
if not source_name in source.component_names:
|
||||
if source_name not in source.component_names:
|
||||
raise KeyError(
|
||||
Errors.E944.format(
|
||||
name=source_name,
|
||||
|
|
|
@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
|
|||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import srsly
|
||||
import warnings
|
||||
|
||||
from .pipe import Pipe
|
||||
from ..training import Example
|
||||
|
|
|
@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
|
|||
en_docs_tokens = [t for doc in en_docs for t in doc]
|
||||
assert len(m_doc) == len(en_docs_tokens)
|
||||
think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
|
||||
assert m_doc[2]._.is_ambiguous == True
|
||||
assert m_doc[2]._.is_ambiguous is True
|
||||
assert m_doc[9].idx == think_idx
|
||||
assert m_doc[9]._.is_ambiguous == True
|
||||
assert m_doc[9]._.is_ambiguous is True
|
||||
assert not any([t._.is_ambiguous for t in m_doc[3:8]])
|
||||
assert "group" in m_doc.spans
|
||||
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
|
||||
|
|
|
@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
|
|||
assert len(list(doc.sents)) == 2
|
||||
with doc.retokenize() as retokenizer:
|
||||
retokenizer.merge(doc[3:6])
|
||||
assert doc[3].is_sent_start == None
|
||||
assert doc[3].is_sent_start is None
|
||||
|
||||
# merging over a sentence boundary and setting sent_start
|
||||
doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)
|
||||
|
|
|
@ -1,5 +1,4 @@
|
|||
import pytest
|
||||
from spacy.lang.bg.lex_attrs import like_num
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
|
|
|
@ -1,4 +1,3 @@
|
|||
import pytest
|
||||
from spacy.tokens import Doc
|
||||
|
||||
|
||||
|
|
|
@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
|
|||
nlp_r = Vietnamese()
|
||||
nlp_r.from_bytes(nlp_bytes)
|
||||
assert nlp_bytes == nlp_r.to_bytes()
|
||||
assert nlp_r.tokenizer.use_pyvi == False
|
||||
assert nlp_r.tokenizer.use_pyvi is False
|
||||
|
||||
with make_tempdir() as d:
|
||||
nlp.to_disk(d)
|
||||
nlp_r = Vietnamese()
|
||||
nlp_r.from_disk(d)
|
||||
assert nlp_bytes == nlp_r.to_bytes()
|
||||
assert nlp_r.tokenizer.use_pyvi == False
|
||||
assert nlp_r.tokenizer.use_pyvi is False
|
||||
|
|
|
@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
|
|||
for token in doc:
|
||||
token.head = doc[0]
|
||||
token.dep_ = "a"
|
||||
get_is_c = lambda token: token.text in ("c",)
|
||||
Token.set_extension("is_c", default=False)
|
||||
doc[2]._.is_c = True
|
||||
pattern = [
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
from typing import Callable, Iterable, Iterator
|
||||
import pytest
|
||||
import io
|
||||
|
||||
from thinc.api import Config
|
||||
from spacy.language import Language
|
||||
|
|
|
@ -11,7 +11,7 @@ from spacy.ml import load_kb
|
|||
from spacy.scorer import Scorer
|
||||
from spacy.training import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.tests.util import make_tempdir, make_tempfile
|
||||
from spacy.tests.util import make_tempdir
|
||||
from spacy.tokens import Span
|
||||
|
||||
|
||||
|
|
|
@ -132,8 +132,8 @@ def test_incomplete_data():
|
|||
# test the trained model
|
||||
test_text = "I like blue eggs"
|
||||
doc = nlp(test_text)
|
||||
assert doc[1].tag_ is "V"
|
||||
assert doc[2].tag_ is "J"
|
||||
assert doc[1].tag_ == "V"
|
||||
assert doc[2].tag_ == "J"
|
||||
|
||||
|
||||
def test_overfitting_IO():
|
||||
|
@ -154,20 +154,20 @@ def test_overfitting_IO():
|
|||
# test the trained model
|
||||
test_text = "I like blue eggs"
|
||||
doc = nlp(test_text)
|
||||
assert doc[0].tag_ is "N"
|
||||
assert doc[1].tag_ is "V"
|
||||
assert doc[2].tag_ is "J"
|
||||
assert doc[3].tag_ is "N"
|
||||
assert doc[0].tag_ == "N"
|
||||
assert doc[1].tag_ == "V"
|
||||
assert doc[2].tag_ == "J"
|
||||
assert doc[3].tag_ == "N"
|
||||
|
||||
# Also test the results are still the same after IO
|
||||
with make_tempdir() as tmp_dir:
|
||||
nlp.to_disk(tmp_dir)
|
||||
nlp2 = util.load_model_from_path(tmp_dir)
|
||||
doc2 = nlp2(test_text)
|
||||
assert doc2[0].tag_ is "N"
|
||||
assert doc2[1].tag_ is "V"
|
||||
assert doc2[2].tag_ is "J"
|
||||
assert doc2[3].tag_ is "N"
|
||||
assert doc2[0].tag_ == "N"
|
||||
assert doc2[1].tag_ == "V"
|
||||
assert doc2[2].tag_ == "J"
|
||||
assert doc2[3].tag_ == "N"
|
||||
|
||||
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||
texts = [
|
||||
|
|
|
@ -2,7 +2,6 @@ import pytest
|
|||
|
||||
from spacy import registry
|
||||
from spacy.language import Language
|
||||
from spacy.pipeline import EntityRuler
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
@ -8,7 +8,7 @@ from spacy.vocab import Vocab
|
|||
from spacy.training import Example
|
||||
from spacy.lang.en import English
|
||||
from spacy.lang.de import German
|
||||
from spacy.util import registry, ignore_error, raise_error, logger
|
||||
from spacy.util import registry, ignore_error, raise_error
|
||||
import spacy
|
||||
from thinc.api import NumpyOps, get_current_ops
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
|
|||
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
|
||||
from spacy.util import dot_to_object, SimpleFrozenList, import_file
|
||||
from spacy.util import to_ternary_int
|
||||
from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
|
||||
from thinc.api import Config, Optimizer, ConfigValidationError
|
||||
from thinc.api import set_current_ops
|
||||
from spacy.training.batchers import minibatch_by_words
|
||||
from spacy.lang.en import English
|
||||
|
|
|
@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
|
|||
suffix_search=suffix_re.search,
|
||||
rules=rules,
|
||||
)
|
||||
tokenizer2 = Tokenizer(
|
||||
en_vocab,
|
||||
suffix_search=suffix_re.search,
|
||||
)
|
||||
assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
|
||||
tokenizer1.rules = {}
|
||||
assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]
|
||||
|
|
|
@ -110,7 +110,8 @@ def wandb_logger(
|
|||
):
|
||||
try:
|
||||
import wandb
|
||||
from wandb import init, log, join # test that these are available
|
||||
# test that these are available
|
||||
from wandb import init, log, join # noqa: F401
|
||||
except ImportError:
|
||||
raise ImportError(Errors.E880)
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
|
||||
from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
|
||||
from typing import Optional, TYPE_CHECKING
|
||||
from pathlib import Path
|
||||
from timeit import default_timer as timer
|
||||
|
|
Loading…
Reference in New Issue
Block a user