Tidy up and auto-format

2025-10-09 05:16:50 +03:00 · 2019-10-28 12:43:55 +01:00 · 2019-10-28 12:43:55 +01:00 · c5e41247e8
commit c5e41247e8
parent 92018b9cd4
9 changed files with 41 additions and 47 deletions
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -307,6 +307,7 @@ def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
    import torch.nn
    from thinc.api import with_square_sequences
    from thinc.extra.wrappers import PyTorchWrapperRNN
+
    if depth == 0:
        return layerize(noop())
    model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
@ -322,7 +323,7 @@ def Tok2Vec(width, embed_size, **kwargs):
    bilstm_depth = kwargs.get("bilstm_depth", 0)

    cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
- 
+
    doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
    if char_embed:
        embed_cfg = {
@ -332,13 +333,10 @@ def Tok2Vec(width, embed_size, **kwargs):
                "chars": 6,
                "@mix": {
                    "arch": "spacy.LayerNormalizedMaxout.v1",
-                    "config": {
-                        "width": width,
-                        "pieces": 3
-                    }
+                    "config": {"width": width, "pieces": 3},
                },
-                "@embed_features": None
-            }
+                "@embed_features": None,
+            },
        }
    else:
        embed_cfg = {
@ -351,12 +349,9 @@ def Tok2Vec(width, embed_size, **kwargs):
                "@pretrained_vectors": None,
                "@mix": {
                    "arch": "spacy.LayerNormalizedMaxout.v1",
-                    "config": {
-                        "width": width,
-                        "pieces": 3
-                    }
+                    "config": {"width": width, "pieces": 3},
                },
-            }
+            },
        }
        if pretrained_vectors:
            embed_cfg["config"]["@pretrained_vectors"] = {
@ -364,8 +359,8 @@ def Tok2Vec(width, embed_size, **kwargs):
                "config": {
                    "vectors_name": pretrained_vectors,
                    "width": width,
-                    "column": cols.index(ID)
-                }
+                    "column": cols.index(ID),
+                },
            }
    cnn_cfg = {
        "arch": "spacy.MaxoutWindowEncoder.v1",
@ -373,35 +368,26 @@ def Tok2Vec(width, embed_size, **kwargs):
            "width": width,
            "window_size": 1,
            "pieces": cnn_maxout_pieces,
-            "depth": conv_depth
-        }
+            "depth": conv_depth,
+        },
    }

    bilstm_cfg = {
        "arch": "spacy.TorchBiLSTMEncoder.v1",
-        "config": {
-            "width": width,
-            "depth": bilstm_depth,
-        }
+        "config": {"width": width, "depth": bilstm_depth},
    }
    if conv_depth == 0 and bilstm_depth == 0:
        encode_cfg = {}
    elif conv_depth >= 1 and bilstm_depth >= 1:
        encode_cfg = {
            "arch": "thinc.FeedForward.v1",
-            "config": {
-                "children": [cnn_cfg, bilstm_cfg]
-            }
+            "config": {"children": [cnn_cfg, bilstm_cfg]},
        }
    elif conv_depth >= 1:
        encode_cfg = cnn_cfg
    else:
        encode_cfg = bilstm_cfg
-    config = {
-        "@doc2feats": doc2feats_cfg,
-        "@embed": embed_cfg,
-        "@encode": encode_cfg
-    }
+    config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg}
    return new_ml.Tok2Vec(config)


--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -262,8 +262,11 @@ def train(
                exits=1,
            )
        train_docs = corpus.train_docs(
-            nlp, noise_level=noise_level, gold_preproc=gold_preproc, max_length=0,
-            ignore_misaligned=True
+            nlp,
+            noise_level=noise_level,
+            gold_preproc=gold_preproc,
+            max_length=0,
+            ignore_misaligned=True,
        )
        train_labels = set()
        if textcat_multilabel:
@ -344,7 +347,7 @@ def train(
                orth_variant_level=orth_variant_level,
                gold_preproc=gold_preproc,
                max_length=0,
-                ignore_misaligned=True
+                ignore_misaligned=True,
            )
            if raw_text:
                random.shuffle(raw_text)
@ -383,8 +386,11 @@ def train(
                        if hasattr(component, "cfg"):
                            component.cfg["beam_width"] = beam_width
                    dev_docs = list(
-                        corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
-                                        ignore_misaligned=True)
+                        corpus.dev_docs(
+                            nlp_loaded,
+                            gold_preproc=gold_preproc,
+                            ignore_misaligned=True,
+                        )
                    )
                    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
                    start_time = timer()
@ -401,8 +407,11 @@ def train(
                                if hasattr(component, "cfg"):
                                    component.cfg["beam_width"] = beam_width
                            dev_docs = list(
-                                corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
-                                                ignore_misaligned=True)
+                                corpus.dev_docs(
+                                    nlp_loaded,
+                                    gold_preproc=gold_preproc,
+                                    ignore_misaligned=True,
+                                )
                            )
                            start_time = timer()
                            scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
--- a/spacy/language.py
+++ b/spacy/language.py
@ -131,9 +131,7 @@ class Language(object):
    Defaults = BaseDefaults
    lang = None

-    factories = {
-        "tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp),
-    }
+    factories = {"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp)}

    def __init__(
        self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs
--- a/spacy/ml/init.py
+++ b/spacy/ml/init.py
@ -1 +1,5 @@
-from .tok2vec import Tok2Vec
+# coding: utf8
+from __future__ import unicode_literals
+
+from .tok2vec import Tok2Vec  # noqa: F401
+from .common import FeedForward, LayerNormalizedMaxout  # noqa: F401
--- a/spacy/ml/common.py
+++ b/spacy/ml/common.py
@ -13,6 +13,7 @@ def FeedForward(config):
    model.cfg = config
    return model

+
@register_architecture("spacy.LayerNormalizedMaxout.v1")
 def LayerNormalizedMaxout(config):
    width = config["width"]
--- a/spacy/ml/tok2vec.py
+++ b/spacy/ml/tok2vec.py
@ -9,7 +9,6 @@ from thinc.misc import Residual, LayerNorm, FeatureExtracter

 from ..util import make_layer, register_architecture
 from ._wire import concatenate_lists
-from .common import *


@register_architecture("spacy.Tok2Vec.v1")
@ -81,8 +80,7 @@ def MaxoutWindowEncoder(config):
    depth = config["depth"]

    cnn = chain(
-        ExtractWindow(nW=nW),
-        LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)),
+        ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))
    )
    model = clone(Residual(cnn), depth)
    model.nO = nO
--- a/spacy/tests/regression/test_issue4402.py
+++ b/spacy/tests/regression/test_issue4402.py
@ -2,7 +2,7 @@
 from __future__ import unicode_literals

 import srsly
-from spacy.gold import GoldCorpus, json_to_tuple
+from spacy.gold import GoldCorpus

 from spacy.lang.en import English
 from spacy.tests.util import make_tempdir
@ -94,4 +94,3 @@ json_data = [
        ],
    }
 ]
-
--- a/spacy/tests/test_gold.py
+++ b/spacy/tests/test_gold.py
@ -205,4 +205,3 @@ def test_align(tokens_a, tokens_b, expected):
    # check symmetry
    cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a)
    assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected
-
--- a/spacy/tests/test_misc.py
+++ b/spacy/tests/test_misc.py
@ -96,14 +96,14 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):

 def test_prefer_gpu():
    try:
-        import cupy
+        import cupy  # noqa: F401
    except ImportError:
        assert not prefer_gpu()


 def test_require_gpu():
    try:
-        import cupy
+        import cupy  # noqa: F401
    except ImportError:
        with pytest.raises(ValueError):
            require_gpu()