mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Tidy up with flake8: imports, comparisons, etc.
This commit is contained in:
parent
4d1ef8f695
commit
86d01e9229
|
@ -111,7 +111,7 @@ universal = false
|
||||||
formats = gztar
|
formats = gztar
|
||||||
|
|
||||||
[flake8]
|
[flake8]
|
||||||
ignore = E203, E266, E501, E731, W503, E741
|
ignore = E203, E266, E501, E731, W503, E741, F541
|
||||||
max-line-length = 80
|
max-line-length = 80
|
||||||
select = B,C,E,F,W,T4,B9
|
select = B,C,E,F,W,T4,B9
|
||||||
exclude =
|
exclude =
|
||||||
|
|
|
@ -6,7 +6,6 @@ import logging
|
||||||
|
|
||||||
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
|
from ._util import app, Arg, Opt, parse_config_overrides, show_validation_error
|
||||||
from ._util import import_code
|
from ._util import import_code
|
||||||
from ..training.initialize import init_nlp
|
|
||||||
from .. import util
|
from .. import util
|
||||||
from ..util import get_sourced_components, load_model_from_config
|
from ..util import get_sourced_components, load_model_from_config
|
||||||
|
|
||||||
|
|
|
@ -1,11 +1,11 @@
|
||||||
from typing import Dict, Any, Optional, Iterable
|
from typing import Dict, Any, Optional
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import itertools
|
import itertools
|
||||||
|
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.util import resolve_dot_names
|
from spacy.util import resolve_dot_names
|
||||||
from wasabi import msg
|
from wasabi import msg
|
||||||
from thinc.api import fix_random_seed, set_dropout_rate, Adam
|
from thinc.api import fix_random_seed, set_dropout_rate
|
||||||
from thinc.api import Model, data_validation, set_gpu_allocator
|
from thinc.api import Model, data_validation, set_gpu_allocator
|
||||||
import typer
|
import typer
|
||||||
|
|
||||||
|
@ -133,7 +133,6 @@ def debug_model(
|
||||||
_print_model(model, print_settings)
|
_print_model(model, print_settings)
|
||||||
|
|
||||||
# STEP 2: Updating the model and printing again
|
# STEP 2: Updating the model and printing again
|
||||||
optimizer = Adam(0.001)
|
|
||||||
set_dropout_rate(model, 0.2)
|
set_dropout_rate(model, 0.2)
|
||||||
# ugly hack to deal with Tok2Vec/Transformer listeners
|
# ugly hack to deal with Tok2Vec/Transformer listeners
|
||||||
upstream_component = None
|
upstream_component = None
|
||||||
|
@ -144,7 +143,6 @@ def debug_model(
|
||||||
and "transformer-listener" in model.get_ref("tok2vec").name
|
and "transformer-listener" in model.get_ref("tok2vec").name
|
||||||
):
|
):
|
||||||
upstream_component = nlp.get_pipe("transformer")
|
upstream_component = nlp.get_pipe("transformer")
|
||||||
goldY = None
|
|
||||||
for e in range(3):
|
for e in range(3):
|
||||||
if upstream_component:
|
if upstream_component:
|
||||||
upstream_component.update(examples)
|
upstream_component.update(examples)
|
||||||
|
|
|
@ -331,7 +331,7 @@ def _format_label_scheme(data: Dict[str, Any]) -> str:
|
||||||
continue
|
continue
|
||||||
col1 = md.bold(md.code(pipe))
|
col1 = md.bold(md.code(pipe))
|
||||||
col2 = ", ".join(
|
col2 = ", ".join(
|
||||||
[md.code(label.replace("|", "\|")) for label in labels]
|
[md.code(label.replace("|", "\\|")) for label in labels]
|
||||||
) # noqa: W605
|
) # noqa: W605
|
||||||
label_data.append((col1, col2))
|
label_data.append((col1, col2))
|
||||||
n_labels += len(labels)
|
n_labels += len(labels)
|
||||||
|
|
|
@ -5,7 +5,6 @@ import requests
|
||||||
from wasabi import msg, Printer
|
from wasabi import msg, Printer
|
||||||
import warnings
|
import warnings
|
||||||
|
|
||||||
from ..errors import Warnings
|
|
||||||
from ._util import app
|
from ._util import app
|
||||||
from .. import about
|
from .. import about
|
||||||
from ..util import get_package_version, get_installed_models, get_minor_version
|
from ..util import get_package_version, get_installed_models, get_minor_version
|
||||||
|
|
|
@ -35,8 +35,8 @@ URL_PATTERN = (
|
||||||
# host & domain names
|
# host & domain names
|
||||||
# mods: match is case-sensitive, so include [A-Z]
|
# mods: match is case-sensitive, so include [A-Z]
|
||||||
r"(?:" # noqa: E131
|
r"(?:" # noqa: E131
|
||||||
r"(?:"
|
r"(?:" # noqa: E131
|
||||||
r"[A-Za-z0-9\u00a1-\uffff]"
|
r"[A-Za-z0-9\u00a1-\uffff]" # noqa: E131
|
||||||
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
r"[A-Za-z0-9\u00a1-\uffff_-]{0,62}"
|
||||||
r")?"
|
r")?"
|
||||||
r"[A-Za-z0-9\u00a1-\uffff]\."
|
r"[A-Za-z0-9\u00a1-\uffff]\."
|
||||||
|
|
|
@ -693,7 +693,7 @@ class Language:
|
||||||
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
|
or self.vocab.vectors.to_bytes() != source.vocab.vectors.to_bytes()
|
||||||
):
|
):
|
||||||
warnings.warn(Warnings.W113.format(name=source_name))
|
warnings.warn(Warnings.W113.format(name=source_name))
|
||||||
if not source_name in source.component_names:
|
if source_name not in source.component_names:
|
||||||
raise KeyError(
|
raise KeyError(
|
||||||
Errors.E944.format(
|
Errors.E944.format(
|
||||||
name=source_name,
|
name=source_name,
|
||||||
|
|
|
@ -3,7 +3,6 @@ from typing import Optional, Union, List, Dict, Tuple, Iterable, Any, Callable,
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import srsly
|
import srsly
|
||||||
import warnings
|
|
||||||
|
|
||||||
from .pipe import Pipe
|
from .pipe import Pipe
|
||||||
from ..training import Example
|
from ..training import Example
|
||||||
|
|
|
@ -381,9 +381,9 @@ def test_doc_api_from_docs(en_tokenizer, de_tokenizer):
|
||||||
en_docs_tokens = [t for doc in en_docs for t in doc]
|
en_docs_tokens = [t for doc in en_docs for t in doc]
|
||||||
assert len(m_doc) == len(en_docs_tokens)
|
assert len(m_doc) == len(en_docs_tokens)
|
||||||
think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
|
think_idx = len(en_texts[0]) + 1 + en_texts[2].index("think")
|
||||||
assert m_doc[2]._.is_ambiguous == True
|
assert m_doc[2]._.is_ambiguous is True
|
||||||
assert m_doc[9].idx == think_idx
|
assert m_doc[9].idx == think_idx
|
||||||
assert m_doc[9]._.is_ambiguous == True
|
assert m_doc[9]._.is_ambiguous is True
|
||||||
assert not any([t._.is_ambiguous for t in m_doc[3:8]])
|
assert not any([t._.is_ambiguous for t in m_doc[3:8]])
|
||||||
assert "group" in m_doc.spans
|
assert "group" in m_doc.spans
|
||||||
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
|
assert span_group_texts == sorted([s.text for s in m_doc.spans["group"]])
|
||||||
|
|
|
@ -484,7 +484,7 @@ def test_doc_retokenize_merge_without_parse_keeps_sents(en_tokenizer):
|
||||||
assert len(list(doc.sents)) == 2
|
assert len(list(doc.sents)) == 2
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
retokenizer.merge(doc[3:6])
|
retokenizer.merge(doc[3:6])
|
||||||
assert doc[3].is_sent_start == None
|
assert doc[3].is_sent_start is None
|
||||||
|
|
||||||
# merging over a sentence boundary and setting sent_start
|
# merging over a sentence boundary and setting sent_start
|
||||||
doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)
|
doc = Doc(tokens.vocab, words=[t.text for t in tokens], sent_starts=sent_starts)
|
||||||
|
|
|
@ -1,5 +1,4 @@
|
||||||
import pytest
|
import pytest
|
||||||
from spacy.lang.bg.lex_attrs import like_num
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize(
|
||||||
|
|
|
@ -1,4 +1,3 @@
|
||||||
import pytest
|
|
||||||
from spacy.tokens import Doc
|
from spacy.tokens import Doc
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -23,11 +23,11 @@ def test_vi_tokenizer_serialize(vi_tokenizer):
|
||||||
nlp_r = Vietnamese()
|
nlp_r = Vietnamese()
|
||||||
nlp_r.from_bytes(nlp_bytes)
|
nlp_r.from_bytes(nlp_bytes)
|
||||||
assert nlp_bytes == nlp_r.to_bytes()
|
assert nlp_bytes == nlp_r.to_bytes()
|
||||||
assert nlp_r.tokenizer.use_pyvi == False
|
assert nlp_r.tokenizer.use_pyvi is False
|
||||||
|
|
||||||
with make_tempdir() as d:
|
with make_tempdir() as d:
|
||||||
nlp.to_disk(d)
|
nlp.to_disk(d)
|
||||||
nlp_r = Vietnamese()
|
nlp_r = Vietnamese()
|
||||||
nlp_r.from_disk(d)
|
nlp_r.from_disk(d)
|
||||||
assert nlp_bytes == nlp_r.to_bytes()
|
assert nlp_bytes == nlp_r.to_bytes()
|
||||||
assert nlp_r.tokenizer.use_pyvi == False
|
assert nlp_r.tokenizer.use_pyvi is False
|
||||||
|
|
|
@ -354,7 +354,6 @@ def test_dependency_matcher_span_user_data(en_tokenizer):
|
||||||
for token in doc:
|
for token in doc:
|
||||||
token.head = doc[0]
|
token.head = doc[0]
|
||||||
token.dep_ = "a"
|
token.dep_ = "a"
|
||||||
get_is_c = lambda token: token.text in ("c",)
|
|
||||||
Token.set_extension("is_c", default=False)
|
Token.set_extension("is_c", default=False)
|
||||||
doc[2]._.is_c = True
|
doc[2]._.is_c = True
|
||||||
pattern = [
|
pattern = [
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from typing import Callable, Iterable, Iterator
|
from typing import Callable, Iterable, Iterator
|
||||||
import pytest
|
import pytest
|
||||||
import io
|
|
||||||
|
|
||||||
from thinc.api import Config
|
from thinc.api import Config
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
|
|
|
@ -11,7 +11,7 @@ from spacy.ml import load_kb
|
||||||
from spacy.scorer import Scorer
|
from spacy.scorer import Scorer
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.tests.util import make_tempdir, make_tempfile
|
from spacy.tests.util import make_tempdir
|
||||||
from spacy.tokens import Span
|
from spacy.tokens import Span
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -132,8 +132,8 @@ def test_incomplete_data():
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "I like blue eggs"
|
test_text = "I like blue eggs"
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
assert doc[1].tag_ is "V"
|
assert doc[1].tag_ == "V"
|
||||||
assert doc[2].tag_ is "J"
|
assert doc[2].tag_ == "J"
|
||||||
|
|
||||||
|
|
||||||
def test_overfitting_IO():
|
def test_overfitting_IO():
|
||||||
|
@ -154,20 +154,20 @@ def test_overfitting_IO():
|
||||||
# test the trained model
|
# test the trained model
|
||||||
test_text = "I like blue eggs"
|
test_text = "I like blue eggs"
|
||||||
doc = nlp(test_text)
|
doc = nlp(test_text)
|
||||||
assert doc[0].tag_ is "N"
|
assert doc[0].tag_ == "N"
|
||||||
assert doc[1].tag_ is "V"
|
assert doc[1].tag_ == "V"
|
||||||
assert doc[2].tag_ is "J"
|
assert doc[2].tag_ == "J"
|
||||||
assert doc[3].tag_ is "N"
|
assert doc[3].tag_ == "N"
|
||||||
|
|
||||||
# Also test the results are still the same after IO
|
# Also test the results are still the same after IO
|
||||||
with make_tempdir() as tmp_dir:
|
with make_tempdir() as tmp_dir:
|
||||||
nlp.to_disk(tmp_dir)
|
nlp.to_disk(tmp_dir)
|
||||||
nlp2 = util.load_model_from_path(tmp_dir)
|
nlp2 = util.load_model_from_path(tmp_dir)
|
||||||
doc2 = nlp2(test_text)
|
doc2 = nlp2(test_text)
|
||||||
assert doc2[0].tag_ is "N"
|
assert doc2[0].tag_ == "N"
|
||||||
assert doc2[1].tag_ is "V"
|
assert doc2[1].tag_ == "V"
|
||||||
assert doc2[2].tag_ is "J"
|
assert doc2[2].tag_ == "J"
|
||||||
assert doc2[3].tag_ is "N"
|
assert doc2[3].tag_ == "N"
|
||||||
|
|
||||||
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
# Make sure that running pipe twice, or comparing to call, always amounts to the same predictions
|
||||||
texts = [
|
texts = [
|
||||||
|
|
|
@ -2,7 +2,6 @@ import pytest
|
||||||
|
|
||||||
from spacy import registry
|
from spacy import registry
|
||||||
from spacy.language import Language
|
from spacy.language import Language
|
||||||
from spacy.pipeline import EntityRuler
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
|
|
|
@ -8,7 +8,7 @@ from spacy.vocab import Vocab
|
||||||
from spacy.training import Example
|
from spacy.training import Example
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.lang.de import German
|
from spacy.lang.de import German
|
||||||
from spacy.util import registry, ignore_error, raise_error, logger
|
from spacy.util import registry, ignore_error, raise_error
|
||||||
import spacy
|
import spacy
|
||||||
from thinc.api import NumpyOps, get_current_ops
|
from thinc.api import NumpyOps, get_current_ops
|
||||||
|
|
||||||
|
|
|
@ -9,7 +9,7 @@ from spacy.ml._precomputable_affine import PrecomputableAffine
|
||||||
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
|
from spacy.ml._precomputable_affine import _backprop_precomputable_affine_padding
|
||||||
from spacy.util import dot_to_object, SimpleFrozenList, import_file
|
from spacy.util import dot_to_object, SimpleFrozenList, import_file
|
||||||
from spacy.util import to_ternary_int
|
from spacy.util import to_ternary_int
|
||||||
from thinc.api import Config, Optimizer, ConfigValidationError, get_current_ops
|
from thinc.api import Config, Optimizer, ConfigValidationError
|
||||||
from thinc.api import set_current_ops
|
from thinc.api import set_current_ops
|
||||||
from spacy.training.batchers import minibatch_by_words
|
from spacy.training.batchers import minibatch_by_words
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
|
|
|
@ -209,10 +209,6 @@ def test_tokenizer_flush_specials(en_vocab):
|
||||||
suffix_search=suffix_re.search,
|
suffix_search=suffix_re.search,
|
||||||
rules=rules,
|
rules=rules,
|
||||||
)
|
)
|
||||||
tokenizer2 = Tokenizer(
|
|
||||||
en_vocab,
|
|
||||||
suffix_search=suffix_re.search,
|
|
||||||
)
|
|
||||||
assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
|
assert [t.text for t in tokenizer1("a a.")] == ["a a", "."]
|
||||||
tokenizer1.rules = {}
|
tokenizer1.rules = {}
|
||||||
assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]
|
assert [t.text for t in tokenizer1("a a.")] == ["a", "a", "."]
|
||||||
|
|
|
@ -110,7 +110,8 @@ def wandb_logger(
|
||||||
):
|
):
|
||||||
try:
|
try:
|
||||||
import wandb
|
import wandb
|
||||||
from wandb import init, log, join # test that these are available
|
# test that these are available
|
||||||
|
from wandb import init, log, join # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
raise ImportError(Errors.E880)
|
raise ImportError(Errors.E880)
|
||||||
|
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
from typing import List, Callable, Tuple, Dict, Iterable, Iterator, Union, Any, IO
|
from typing import List, Callable, Tuple, Dict, Iterable, Union, Any, IO
|
||||||
from typing import Optional, TYPE_CHECKING
|
from typing import Optional, TYPE_CHECKING
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from timeit import default_timer as timer
|
from timeit import default_timer as timer
|
||||||
|
|
Loading…
Reference in New Issue
Block a user