Tidy up and auto-format

2026-01-10 02:31:16 +03:00 · 2020-09-21 10:59:07 +02:00 · 2020-09-21 10:59:07 +02:00 · 1114219ae3
commit 1114219ae3
parent 9d32cac736
14 changed files with 69 additions and 46 deletions
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -6,7 +6,6 @@ from wasabi import msg
 import srsly
 import hashlib
 import typer
-import subprocess
 from click import NoSuchOption
 from typer.main import get_command
 from contextlib import contextmanager
@ -327,7 +326,7 @@ def git_checkout(
        )
    with make_tempdir() as tmp_dir:
        cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
-        ret = run_command(cmd, capture=True)
+        run_command(cmd, capture=True)
        # We need Path(name) to make sure we also support subdirectories
        shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))

--- a/spacy/language.py
+++ b/spacy/language.py
@ -156,11 +156,7 @@ class Language:
            raise ValueError(Errors.E918.format(vocab=vocab, vocab_type=type(Vocab)))
        if vocab is True:
            vectors_name = meta.get("vectors", {}).get("name")
-            vocab = create_vocab(
-                self.lang,
-                self.Defaults,
-                vectors_name=vectors_name,
-            )
+            vocab = create_vocab(self.lang, self.Defaults, vectors_name=vectors_name)
        else:
            if (self.lang and vocab.lang) and (self.lang != vocab.lang):
                raise ValueError(Errors.E150.format(nlp=self.lang, vocab=vocab.lang))
@ -1462,7 +1458,7 @@ class Language:
        # here :(
        for i, (name1, proc1) in enumerate(self.pipeline):
            if hasattr(proc1, "find_listeners"):
-                for name2, proc2 in self.pipeline[i+1:]:
+                for name2, proc2 in self.pipeline[i + 1 :]:
                    if isinstance(getattr(proc2, "model", None), Model):
                        proc1.find_listeners(proc2.model)

--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -164,7 +164,9 @@ def MultiHashEmbed(


@registry.architectures.register("spacy.CharacterEmbed.v1")
-def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool):
+def CharacterEmbed(
+    width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool
+):
    """Construct an embedded representation based on character embeddings, using
    a feed-forward network. A fixed number of UTF-8 byte characters are used for
    each word, taken from the beginning and end of the word equally. Padding is
@ -202,9 +204,11 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vect
                ),
                StaticVectors(width, dropout=0.0),
            ),
-            with_array(Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)),
+            with_array(
+                Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)
+            ),
            ragged2list(),
-    )
+        )
    else:
        model = chain(
            concatenate(
@ -215,9 +219,11 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vect
                    with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),
                ),
            ),
-            with_array(Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)),
+            with_array(
+                Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)
+            ),
            ragged2list(),
-    )
+        )
    return model


--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -1,4 +1,4 @@
-from typing import Dict, List, Union, Optional, Sequence, Any, Callable, Type, Tuple
+from typing import Dict, List, Union, Optional, Any, Callable, Type, Tuple
 from typing import Iterable, TypeVar, TYPE_CHECKING
 from enum import Enum
 from pydantic import BaseModel, Field, ValidationError, validator
@ -255,7 +255,7 @@ class ConfigSchemaPretrain(BaseModel):
    batcher: Batcher = Field(..., title="Batcher for the training data")
    component: str = Field(..., title="Component to find the layer to pretrain")
    layer: str = Field(..., title="Layer to pretrain. Whole model if empty.")
- 
+
    # TODO: use a more detailed schema for this?
    objective: Dict[str, Any] = Field(..., title="Pretraining objective")
    # fmt: on
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@ -92,7 +92,12 @@ def test_spans_span_sent(doc, doc_not_parsed):
 def test_spans_lca_matrix(en_tokenizer):
    """Test span's lca matrix generation"""
    tokens = en_tokenizer("the lazy dog slept")
-    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=[2, 1, 1, 0], deps=["dep"] * 4)
+    doc = get_doc(
+        tokens.vocab,
+        words=[t.text for t in tokens],
+        heads=[2, 1, 1, 0],
+        deps=["dep"] * 4,
+    )
    lca = doc[:2].get_lca_matrix()
    assert lca.shape == (2, 2)
    assert lca[0, 0] == 0  # the & the -> the
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@ -63,7 +63,12 @@ def test_parser_parse_navigate_consistency(en_tokenizer, text, heads):

 def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads):
    tokens = en_tokenizer(text)
-    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=["dep"] * len(heads))
+    doc = get_doc(
+        tokens.vocab,
+        words=[t.text for t in tokens],
+        heads=heads,
+        deps=["dep"] * len(heads),
+    )

    lefts = {}
    rights = {}
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@ -345,10 +345,7 @@ def test_language_factories_invalid():
            [{"a": 100, "b": 400}, {"c": 0.5, "d": 0.5}],
            {"a": 0.1, "b": 0.4, "c": 0.25, "d": 0.25},
        ),
-        (
-            [{"a": 0.5, "b": 0.5}, {"b": 1.0}],
-            {"a": 0.25, "b": 0.75},
-        ),
+        ([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.25, "b": 0.75},),
    ],
 )
 def test_language_factories_combine_score_weights(weights, expected):
@ -363,16 +360,10 @@ def test_language_factories_scores():
    weights1 = {"a1": 0.5, "a2": 0.5}
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
    Language.factory(
-        f"{name}1",
-        scores=list(weights1),
-        default_score_weights=weights1,
-        func=func,
+        f"{name}1", scores=list(weights1), default_score_weights=weights1, func=func,
    )
    Language.factory(
-        f"{name}2",
-        scores=list(weights2),
-        default_score_weights=weights2,
-        func=func,
+        f"{name}2", scores=list(weights2), default_score_weights=weights2, func=func,
    )
    meta1 = Language.get_factory_meta(f"{name}1")
    assert meta1.default_score_weights == weights1
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -212,9 +212,17 @@ def test_issue1834():
        heads=[0, -1, -2, -3, -4, -5, 0, -1, -2],
        deps=["dep"] * len(words),
    )
-    print(doc.has_annotation("DEP"), [t.head.i for t in doc], [t.is_sent_start for t in doc])
+    print(
+        doc.has_annotation("DEP"),
+        [t.head.i for t in doc],
+        [t.is_sent_start for t in doc],
+    )
    new_doc = Doc(doc.vocab).from_bytes(doc.to_bytes())
-    print(new_doc.has_annotation("DEP"), [t.head.i for t in new_doc], [t.is_sent_start for t in new_doc])
+    print(
+        new_doc.has_annotation("DEP"),
+        [t.head.i for t in new_doc],
+        [t.is_sent_start for t in new_doc],
+    )
    assert new_doc[6].sent_start
    assert new_doc.has_annotation("DEP")
    assert new_doc.has_annotation("TAG")
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -136,7 +136,13 @@ def test_serialize_textcat_empty(en_vocab):
    # See issue #1105
    cfg = {"model": DEFAULT_TEXTCAT_MODEL}
    model = registry.make_from_config(cfg, validate=True)["model"]
-    textcat = TextCategorizer(en_vocab, model, labels=["ENTITY", "ACTION", "MODIFIER"], threshold=0.5, positive_label=None)
+    textcat = TextCategorizer(
+        en_vocab,
+        model,
+        labels=["ENTITY", "ACTION", "MODIFIER"],
+        threshold=0.5,
+        positive_label=None,
+    )
    textcat.to_bytes(exclude=["vocab"])


--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@ -3,7 +3,6 @@ from click import NoSuchOption

 from spacy.training import docs_to_json, biluo_tags_from_offsets
 from spacy.training.converters import iob2docs, conll_ner2docs, conllu2docs
-from spacy.lang.en import English
 from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
 from spacy.cli.init_config import init_config, RECOMMENDATIONS
 from spacy.cli._util import validate_project_commands, parse_config_overrides
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -291,8 +291,7 @@ def test_spacy_blank():


@pytest.mark.parametrize(
-    "value",
-    [False, None, ["x", "y"], Language, Vocab],
+    "value", [False, None, ["x", "y"], Language, Vocab],
 )
 def test_language_init_invalid_vocab(value):
    err_fragment = "invalid value"
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@ -95,7 +95,7 @@ def test_util_dot_section():
    assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False
    # Test that default values got overwritten
    assert en_config["nlp"]["pipeline"] == ["textcat"]
-    assert nl_config["nlp"]["pipeline"] == [] # default value []
+    assert nl_config["nlp"]["pipeline"] == []  # default value []
    # Test proper functioning of 'dot_to_object'
    with pytest.raises(KeyError):
        dot_to_object(en_config, "nlp.pipeline.tagger")
--- a/spacy/tests/training/test_readers.py
+++ b/spacy/tests/training/test_readers.py
@ -1,7 +1,6 @@
 from typing import Dict, Iterable, Callable
 import pytest
 from thinc.api import Config
-
 from spacy import Language
 from spacy.util import load_model_from_config, registry, dot_to_object
 from spacy.training import Example
@ -10,19 +9,19 @@ from spacy.training import Example
 def test_readers():
    config_string = """
    [training]
-    
+
    [corpora]
    @readers = "myreader.v1"

    [nlp]
    lang = "en"
    pipeline = ["tok2vec", "textcat"]
-    
+
    [components]
-    
+
    [components.tok2vec]
    factory = "tok2vec"
-    
+
    [components.textcat]
    factory = "textcat"
    """
@ -69,19 +68,19 @@ def test_readers():
 def test_cat_readers(reader, additional_config):
    nlp_config_string = """
    [training]
-    
+
    [corpora]
    @readers = "PLACEHOLDER"

    [nlp]
    lang = "en"
    pipeline = ["tok2vec", "textcat"]
-    
+
    [components]
-    
+
    [components.tok2vec]
    factory = "tok2vec"
-    
+
    [components.textcat]
    factory = "textcat"
    """
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@ -34,7 +34,17 @@ def doc():
    # fmt: on
    nlp = English()
    words = [t.text for t in nlp.make_doc(text)]
-    doc = get_doc(nlp.vocab, words=words, tags=tags, pos=pos, morphs=morphs, heads=heads, deps=deps, lemmas=lemmas, ents=ents)
+    doc = get_doc(
+        nlp.vocab,
+        words=words,
+        tags=tags,
+        pos=pos,
+        morphs=morphs,
+        heads=heads,
+        deps=deps,
+        lemmas=lemmas,
+        ents=ents,
+    )
    doc.cats = cats
    return doc