Tidy up and auto-format

2025-07-13 09:42:26 +03:00 · 2020-09-21 10:59:07 +02:00 · 2020-09-21 10:59:07 +02:00 · 1114219ae3
commit 1114219ae3
parent 9d32cac736
14 changed files with 69 additions and 46 deletions
--- a/spacy/cli/_util.py
+++ b/spacy/cli/_util.py
@ -6,7 +6,6 @@ from wasabi import msg
 import srsly
 import hashlib
 import typer
 import subprocess
 from click import NoSuchOption
 from typer.main import get_command
 from contextlib import contextmanager
@ -327,7 +326,7 @@ def git_checkout(
        )
    with make_tempdir() as tmp_dir:
        cmd = f"git -C {tmp_dir} clone {repo} . -b {branch}"
-        ret = run_command(cmd, capture=True)
+        run_command(cmd, capture=True)
        # We need Path(name) to make sure we also support subdirectories
        shutil.copytree(str(tmp_dir / Path(subpath)), str(dest))
--- a/spacy/language.py
+++ b/spacy/language.py
@ -156,11 +156,7 @@ class Language:
            raise ValueError(Errors.E918.format(vocab=vocab, vocab_type=type(Vocab)))
        if vocab is True:
            vectors_name = meta.get("vectors", {}).get("name")
-            vocab = create_vocab(
+            vocab = create_vocab(self.lang, self.Defaults, vectors_name=vectors_name)
                self.lang,
                self.Defaults,
                vectors_name=vectors_name,
            )
        else:
            if (self.lang and vocab.lang) and (self.lang != vocab.lang):
                raise ValueError(Errors.E150.format(nlp=self.lang, vocab=vocab.lang))
@ -1462,7 +1458,7 @@ class Language:
        # here :(
        for i, (name1, proc1) in enumerate(self.pipeline):
            if hasattr(proc1, "find_listeners"):
-                for name2, proc2 in self.pipeline[i+1:]:
+                for name2, proc2 in self.pipeline[i + 1 :]:
                    if isinstance(getattr(proc2, "model", None), Model):
                        proc1.find_listeners(proc2.model)
--- a/spacy/ml/models/tok2vec.py
+++ b/spacy/ml/models/tok2vec.py
@ -164,7 +164,9 @@ def MultiHashEmbed(
@registry.architectures.register("spacy.CharacterEmbed.v1")
-def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool):
+def CharacterEmbed(
    width: int, rows: int, nM: int, nC: int, also_use_static_vectors: bool
 ):
    """Construct an embedded representation based on character embeddings, using
    a feed-forward network. A fixed number of UTF-8 byte characters are used for
    each word, taken from the beginning and end of the word equally. Padding is
@ -202,9 +204,11 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vect
                ),
                StaticVectors(width, dropout=0.0),
            ),
-            with_array(Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)),
+            with_array(
                Maxout(width, nM * nC + (2 * width), nP=3, normalize=True, dropout=0.0)
            ),
            ragged2list(),
-    )
+        )
    else:
        model = chain(
            concatenate(
@ -215,9 +219,11 @@ def CharacterEmbed(width: int, rows: int, nM: int, nC: int, also_use_static_vect
                    with_array(HashEmbed(nO=width, nV=rows, column=0, seed=5)),
                ),
            ),
-            with_array(Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)),
+            with_array(
                Maxout(width, nM * nC + width, nP=3, normalize=True, dropout=0.0)
            ),
            ragged2list(),
-    )
+        )
    return model
--- a/spacy/schemas.py
+++ b/spacy/schemas.py
@ -1,4 +1,4 @@
-from typing import Dict, List, Union, Optional, Sequence, Any, Callable, Type, Tuple
+from typing import Dict, List, Union, Optional, Any, Callable, Type, Tuple
 from typing import Iterable, TypeVar, TYPE_CHECKING
 from enum import Enum
 from pydantic import BaseModel, Field, ValidationError, validator
--- a/spacy/tests/doc/test_span.py
+++ b/spacy/tests/doc/test_span.py
@ -92,7 +92,12 @@ def test_spans_span_sent(doc, doc_not_parsed):
 def test_spans_lca_matrix(en_tokenizer):
    """Test span's lca matrix generation"""
    tokens = en_tokenizer("the lazy dog slept")
-    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=[2, 1, 1, 0], deps=["dep"] * 4)
+    doc = get_doc(
        tokens.vocab,
        words=[t.text for t in tokens],
        heads=[2, 1, 1, 0],
        deps=["dep"] * 4,
    )
    lca = doc[:2].get_lca_matrix()
    assert lca.shape == (2, 2)
    assert lca[0, 0] == 0  # the & the -> the
--- a/spacy/tests/parser/test_parse_navigate.py
+++ b/spacy/tests/parser/test_parse_navigate.py
@ -63,7 +63,12 @@ def test_parser_parse_navigate_consistency(en_tokenizer, text, heads):
 def test_parser_parse_navigate_child_consistency(en_tokenizer, text, heads):
    tokens = en_tokenizer(text)
-    doc = get_doc(tokens.vocab, words=[t.text for t in tokens], heads=heads, deps=["dep"] * len(heads))
+    doc = get_doc(
        tokens.vocab,
        words=[t.text for t in tokens],
        heads=heads,
        deps=["dep"] * len(heads),
    )
    lefts = {}
    rights = {}
--- a/spacy/tests/pipeline/test_pipe_factories.py
+++ b/spacy/tests/pipeline/test_pipe_factories.py
@ -345,10 +345,7 @@ def test_language_factories_invalid():
            [{"a": 100, "b": 400}, {"c": 0.5, "d": 0.5}],
            {"a": 0.1, "b": 0.4, "c": 0.25, "d": 0.25},
        ),
-        (
+        ([{"a": 0.5, "b": 0.5}, {"b": 1.0}], {"a": 0.25, "b": 0.75},),
            [{"a": 0.5, "b": 0.5}, {"b": 1.0}],
            {"a": 0.25, "b": 0.75},
        ),
    ],
 )
 def test_language_factories_combine_score_weights(weights, expected):
@ -363,16 +360,10 @@ def test_language_factories_scores():
    weights1 = {"a1": 0.5, "a2": 0.5}
    weights2 = {"b1": 0.2, "b2": 0.7, "b3": 0.1}
    Language.factory(
-        f"{name}1",
+        f"{name}1", scores=list(weights1), default_score_weights=weights1, func=func,
        scores=list(weights1),
        default_score_weights=weights1,
        func=func,
    )
    Language.factory(
-        f"{name}2",
+        f"{name}2", scores=list(weights2), default_score_weights=weights2, func=func,
        scores=list(weights2),
        default_score_weights=weights2,
        func=func,
    )
    meta1 = Language.get_factory_meta(f"{name}1")
    assert meta1.default_score_weights == weights1
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -212,9 +212,17 @@ def test_issue1834():
        heads=[0, -1, -2, -3, -4, -5, 0, -1, -2],
        deps=["dep"] * len(words),
    )
-    print(doc.has_annotation("DEP"), [t.head.i for t in doc], [t.is_sent_start for t in doc])
+    print(
        doc.has_annotation("DEP"),
        [t.head.i for t in doc],
        [t.is_sent_start for t in doc],
    )
    new_doc = Doc(doc.vocab).from_bytes(doc.to_bytes())
-    print(new_doc.has_annotation("DEP"), [t.head.i for t in new_doc], [t.is_sent_start for t in new_doc])
+    print(
        new_doc.has_annotation("DEP"),
        [t.head.i for t in new_doc],
        [t.is_sent_start for t in new_doc],
    )
    assert new_doc[6].sent_start
    assert new_doc.has_annotation("DEP")
    assert new_doc.has_annotation("TAG")
--- a/spacy/tests/serialize/test_serialize_pipeline.py
+++ b/spacy/tests/serialize/test_serialize_pipeline.py
@ -136,7 +136,13 @@ def test_serialize_textcat_empty(en_vocab):
    # See issue #1105
    cfg = {"model": DEFAULT_TEXTCAT_MODEL}
    model = registry.make_from_config(cfg, validate=True)["model"]
-    textcat = TextCategorizer(en_vocab, model, labels=["ENTITY", "ACTION", "MODIFIER"], threshold=0.5, positive_label=None)
+    textcat = TextCategorizer(
        en_vocab,
        model,
        labels=["ENTITY", "ACTION", "MODIFIER"],
        threshold=0.5,
        positive_label=None,
    )
    textcat.to_bytes(exclude=["vocab"])
--- a/spacy/tests/test_cli.py
+++ b/spacy/tests/test_cli.py
@ -3,7 +3,6 @@ from click import NoSuchOption
 from spacy.training import docs_to_json, biluo_tags_from_offsets
 from spacy.training.converters import iob2docs, conll_ner2docs, conllu2docs
 from spacy.lang.en import English
 from spacy.schemas import ProjectConfigSchema, RecommendationSchema, validate
 from spacy.cli.init_config import init_config, RECOMMENDATIONS
 from spacy.cli._util import validate_project_commands, parse_config_overrides
--- a/spacy/tests/test_language.py
+++ b/spacy/tests/test_language.py
@ -291,8 +291,7 @@ def test_spacy_blank():
@pytest.mark.parametrize(
-    "value",
+    "value", [False, None, ["x", "y"], Language, Vocab],
    [False, None, ["x", "y"], Language, Vocab],
 )
 def test_language_init_invalid_vocab(value):
    err_fragment = "invalid value"
--- a/spacy/tests/test_util.py
+++ b/spacy/tests/test_util.py
@ -95,7 +95,7 @@ def test_util_dot_section():
    assert en_nlp.get_pipe("textcat").model.attrs["multi_label"] is False
    # Test that default values got overwritten
    assert en_config["nlp"]["pipeline"] == ["textcat"]
-    assert nl_config["nlp"]["pipeline"] == [] # default value []
+    assert nl_config["nlp"]["pipeline"] == []  # default value []
    # Test proper functioning of 'dot_to_object'
    with pytest.raises(KeyError):
        dot_to_object(en_config, "nlp.pipeline.tagger")
--- a/spacy/tests/training/test_readers.py
+++ b/spacy/tests/training/test_readers.py
@ -1,7 +1,6 @@
 from typing import Dict, Iterable, Callable
 import pytest
 from thinc.api import Config
 from spacy import Language
 from spacy.util import load_model_from_config, registry, dot_to_object
 from spacy.training import Example
--- a/spacy/tests/training/test_training.py
+++ b/spacy/tests/training/test_training.py
@ -34,7 +34,17 @@ def doc():
    # fmt: on
    nlp = English()
    words = [t.text for t in nlp.make_doc(text)]
-    doc = get_doc(nlp.vocab, words=words, tags=tags, pos=pos, morphs=morphs, heads=heads, deps=deps, lemmas=lemmas, ents=ents)
+    doc = get_doc(
        nlp.vocab,
        words=words,
        tags=tags,
        pos=pos,
        morphs=morphs,
        heads=heads,
        deps=deps,
        lemmas=lemmas,
        ents=ents,
    )
    doc.cats = cats
    return doc