Tidy up and auto-format

2025-07-15 10:42:34 +03:00 · 2020-08-09 22:36:23 +02:00 · 2020-08-09 22:36:23 +02:00 · 3eaeb73342
commit 3eaeb73342
parent 05dcab10aa
15 changed files with 32 additions and 40 deletions
--- a/spacy/gold/batchers.py
+++ b/spacy/gold/batchers.py
@ -1,4 +1,4 @@
-from typing import Union, Iterator, Iterable, Sequence, TypeVar, List, Callable
+from typing import Union, Iterable, Sequence, TypeVar, List, Callable
 from typing import Optional, Any
 from functools import partial
 import itertools
@ -20,7 +20,7 @@ def configure_minibatch_by_padded_size(
    get_length: Optional[Callable[[ItemT], int]] = None
 ) -> BatcherT:
    """Create a batcher that uses the `batch_by_padded_size` strategy.
-    
+
    The padded size is defined as the maximum length of sequences within the
    batch multiplied by the number of sequences in the batch.
@ -92,7 +92,7 @@ def minibatch_by_padded_size(
 ) -> Iterable[List[ItemT]]:
    """Minibatch a sequence by the size of padded batches that would result,
    with sequences binned by length within a window.
-    
+
    The padded size is defined as the maximum length of sequences within the
    batch multiplied by the number of sequences in the batch.
@ -123,7 +123,11 @@ def minibatch_by_padded_size(
 def minibatch_by_words(
-    seqs: Iterable[ItemT], size: Sizing, tolerance=0.2, discard_oversize=False, get_length=len
+    seqs: Iterable[ItemT],
    size: Sizing,
    tolerance=0.2,
    discard_oversize=False,
    get_length=len,
 ) -> Iterable[List[ItemT]]:
    """Create minibatches of roughly a given number of words. If any examples
    are longer than the specified batch length, they will appear in a batch by
--- a/spacy/lang/en/lemmatizer.py
+++ b/spacy/lang/en/lemmatizer.py
@ -1,5 +1,3 @@
 from typing import Optional
 from ...pipeline import Lemmatizer
 from ...tokens import Token
--- a/spacy/language.py
+++ b/spacy/language.py
@ -27,7 +27,6 @@ from .lang.tokenizer_exceptions import URL_MATCH, BASE_EXCEPTIONS
 from .lang.punctuation import TOKENIZER_PREFIXES, TOKENIZER_SUFFIXES
 from .lang.punctuation import TOKENIZER_INFIXES
 from .tokens import Doc
 from .lookups import load_lookups
 from .tokenizer import Tokenizer
 from .errors import Errors, Warnings
 from .schemas import ConfigSchema
@ -1439,10 +1438,7 @@ class Language:
                or lang_cls is not cls
            ):
                raise ValueError(Errors.E943.format(value=type(lang_cls)))
-        nlp = lang_cls(
+        nlp = lang_cls(vocab=vocab, create_tokenizer=create_tokenizer)
            vocab=vocab,
            create_tokenizer=create_tokenizer,
        )
        if after_creation is not None:
            nlp = after_creation(nlp)
            if not isinstance(nlp, cls):
--- a/spacy/pipeline/tok2vec.py
+++ b/spacy/pipeline/tok2vec.py
@ -34,11 +34,11 @@ def make_tok2vec(nlp: Language, name: str, model: Model) -> "Tok2Vec":
 class Tok2Vec(Pipe):
    """Apply a "token-to-vector" model and set its outputs in the doc.tensor
    attribute. This is mostly useful to share a single subnetwork between multiple
-    components, e.g. to have one embedding and CNN network shared between a 
+    components, e.g. to have one embedding and CNN network shared between a
    parser, tagger and NER.
    In order to use the `Tok2Vec` predictions, subsequent components should use
-    the `Tok2VecListener` layer as the tok2vec subnetwork of their model. This 
+    the `Tok2VecListener` layer as the tok2vec subnetwork of their model. This
    layer will read data from the `doc.tensor` attribute during prediction.
    During training, the `Tok2Vec` component will save its prediction and backprop
    callback for each batch, so that the subsequent components can backpropagate
@ -46,6 +46,7 @@ class Tok2Vec(Pipe):
    avoid relying on object identity within the models to achieve the parameter
    sharing.
    """
    def __init__(self, vocab: Vocab, model: Model, name: str = "tok2vec") -> None:
        """Initialize a tok2vec component.
@ -239,6 +240,7 @@ class Tok2VecListener(Model):
    from the Tok2Vec component into downstream components, and communicating
    gradients back upstream.
    """
    name = "tok2vec-listener"
    def __init__(self, upstream_name: str, width: int) -> None:
--- a/spacy/scorer.py
+++ b/spacy/scorer.py
@ -426,7 +426,7 @@ class Scorer:
            f"{attr}_auc_per_type": {k: v.score for k, v in auc_per_type.items()},
        }
        if len(labels) == 2 and not multi_label and positive_label:
-            positive_label_f = results[f"{attr}_f_per_type"][positive_label]['f']
+            positive_label_f = results[f"{attr}_f_per_type"][positive_label]["f"]
            results[f"{attr}_score"] = positive_label_f
            results[f"{attr}_score_desc"] = f"F ({positive_label})"
        elif not multi_label:
--- a/spacy/tests/morphology/test_morph_pickle.py
+++ b/spacy/tests/morphology/test_morph_pickle.py
@ -15,5 +15,7 @@ def morphology():
 def test_morphology_pickle_roundtrip(morphology):
    b = pickle.dumps(morphology)
    reloaded_morphology = pickle.loads(b)
-    assert reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"]) == "Feat1=Val1|Feat2=Val2"
+    feat = reloaded_morphology.get(morphology.strings["Feat1=Val1|Feat2=Val2"])
-    assert reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"]) == "Feat3=Val3|Feat4=Val4"
+    assert feat == "Feat1=Val1|Feat2=Val2"
    feat = reloaded_morphology.get(morphology.strings["Feat3=Val3|Feat4=Val4"])
    assert feat == "Feat3=Val3|Feat4=Val4"
--- a/spacy/tests/parser/test_ner.py
+++ b/spacy/tests/parser/test_ner.py
@ -144,8 +144,7 @@ def test_accept_blocked_token():
    # 1. test normal behaviour
    nlp1 = English()
    doc1 = nlp1("I live in New York")
-    config = {
+    config = {}
    }
    ner1 = nlp1.create_pipe("ner", config=config)
    assert [token.ent_iob_ for token in doc1] == ["", "", "", "", ""]
    assert [token.ent_type_ for token in doc1] == ["", "", "", "", ""]
@ -164,8 +163,7 @@ def test_accept_blocked_token():
    # 2. test blocking behaviour
    nlp2 = English()
    doc2 = nlp2("I live in New York")
-    config = {
+    config = {}
    }
    ner2 = nlp2.create_pipe("ner", config=config)
    # set "New York" to a blocked entity
@ -220,8 +218,7 @@ def test_overwrite_token():
    assert [token.ent_iob_ for token in doc] == ["O", "O", "O", "O", "O"]
    assert [token.ent_type_ for token in doc] == ["", "", "", "", ""]
    # Check that a new ner can overwrite O
-    config = {
+    config = {}
    }
    ner2 = nlp.create_pipe("ner", config=config)
    ner2.moves.add_action(5, "")
    ner2.add_label("GPE")
--- a/spacy/tests/pipeline/test_lemmatizer.py
+++ b/spacy/tests/pipeline/test_lemmatizer.py
@ -1,8 +1,7 @@
 import pytest
 from spacy import util, registry
 from spacy.lang.en import English
-from spacy.lookups import Lookups, load_lookups
+from spacy.lookups import Lookups
 from ..util import make_tempdir
--- a/spacy/tests/pipeline/test_tagger.py
+++ b/spacy/tests/pipeline/test_tagger.py
@ -1,10 +1,8 @@
 import pytest
 from spacy import util
 from spacy.gold import Example
 from spacy.lang.en import English
 from spacy.language import Language
 from spacy.symbols import POS, NOUN
 from ..util import make_tempdir
--- a/spacy/tests/pipeline/test_textcat.py
+++ b/spacy/tests/pipeline/test_textcat.py
@ -117,9 +117,7 @@ def test_overfitting_IO():
        assert cats2["POSITIVE"] + cats2["NEGATIVE"] == pytest.approx(1.0, 0.1)
    # Test scoring
-    scores = nlp.evaluate(
+    scores = nlp.evaluate(train_examples, scorer_cfg={"positive_label": "POSITIVE"})
        train_examples, scorer_cfg={"positive_label": "POSITIVE"}
    )
    assert scores["cats_micro_f"] == 1.0
    assert scores["cats_score"] == 1.0
    assert "cats_score_desc" in scores
--- a/spacy/tests/regression/test_issue1-1000.py
+++ b/spacy/tests/regression/test_issue1-1000.py
@ -1,11 +1,9 @@
 import pytest
 import random
 from spacy import util
 from spacy.gold import Example
 from spacy.matcher import Matcher
 from spacy.attrs import IS_PUNCT, ORTH, LOWER
 from spacy.symbols import POS, VERB
 from spacy.vocab import Vocab
 from spacy.lang.en import English
 from spacy.lookups import Lookups
--- a/spacy/tests/regression/test_issue1001-1500.py
+++ b/spacy/tests/regression/test_issue1001-1500.py
@ -6,8 +6,7 @@ from spacy.lang.en import English
 from spacy.lang.lex_attrs import LEX_ATTRS
 from spacy.matcher import Matcher
 from spacy.tokenizer import Tokenizer
-from spacy.lookups import Lookups
+from spacy.symbols import ORTH, LEMMA, POS
 from spacy.symbols import ORTH, LEMMA, POS, VERB
 def test_issue1061():
--- a/spacy/tests/regression/test_issue1501-2000.py
+++ b/spacy/tests/regression/test_issue1501-2000.py
@ -271,8 +271,7 @@ def test_issue1963(en_tokenizer):
@pytest.mark.parametrize("label", ["U-JOB-NAME"])
 def test_issue1967(label):
    nlp = Language()
-    config = {
+    config = {}
    }
    ner = nlp.create_pipe("ner", config=config)
    example = Example.from_dict(
        Doc(ner.vocab, words=["word"]),
--- a/spacy/tests/regression/test_issue3501-4000.py
+++ b/spacy/tests/regression/test_issue3501-4000.py
@ -157,7 +157,11 @@ def test_issue3540(en_vocab):
    with doc.retokenize() as retokenizer:
        heads = [(doc[3], 1), doc[2]]
-        attrs = {"POS": ["PROPN", "PROPN"], "LEMMA": ["New", "York"], "DEP": ["pobj", "compound"]}
+        attrs = {
            "POS": ["PROPN", "PROPN"],
            "LEMMA": ["New", "York"],
            "DEP": ["pobj", "compound"],
        }
        retokenizer.split(doc[3], ["New", "York"], heads=heads, attrs=attrs)
    gold_text = ["I", "live", "in", "New", "York", "right", "now"]
--- a/spacy/tests/regression/test_issue4001-4500.py
+++ b/spacy/tests/regression/test_issue4001-4500.py
@ -138,8 +138,7 @@ def test_issue4042_bug2():
        if not output_dir.exists():
            output_dir.mkdir()
        ner1.to_disk(output_dir)
-        config = {
+        config = {}
        }
        ner2 = nlp1.create_pipe("ner", config=config)
        ner2.from_disk(output_dir)
        assert len(ner2.labels) == 2
@ -301,8 +300,7 @@ def test_issue4313():
    beam_width = 16
    beam_density = 0.0001
    nlp = English()
-    config = {
+    config = {}
    }
    ner = nlp.create_pipe("ner", config=config)
    ner.add_label("SOME_LABEL")
    ner.begin_training([])