From 0c17ea4c851d2d5996447f1da8d6de2b601e5ec7 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 28 Jul 2020 22:02:34 +0200 Subject: [PATCH] Format --- spacy/ml/models/tok2vec.py | 32 ++++++++++++++------------------ spacy/ml/staticvectors.py | 14 +++++--------- spacy/util.py | 9 +++------ 3 files changed, 22 insertions(+), 33 deletions(-) diff --git a/spacy/ml/models/tok2vec.py b/spacy/ml/models/tok2vec.py index 448f9d1d0..f9183e709 100644 --- a/spacy/ml/models/tok2vec.py +++ b/spacy/ml/models/tok2vec.py @@ -23,7 +23,7 @@ def tok2vec_listener_v1(width, upstream="*"): @registry.architectures.register("spacy.Tok2Vec.v1") def Tok2Vec( embed: Model[List[Doc], List[Floats2d]], - encode: Model[List[Floats2d], List[Floats2d]] + encode: Model[List[Floats2d], List[Floats2d]], ) -> Model[List[Doc], List[Floats2d]]: receptive_field = encode.attrs.get("receptive_field", 0) @@ -36,14 +36,12 @@ def Tok2Vec( @registry.architectures.register("spacy.MultiHashEmbed.v1") def MultiHashEmbed( - width: int, - rows: int, - also_embed_subwords: bool, - also_use_static_vectors: bool + width: int, rows: int, also_embed_subwords: bool, also_use_static_vectors: bool ): cols = [NORM, PREFIX, SUFFIX, SHAPE, ORTH] - + seed = 7 + def make_hash_embed(feature): nonlocal seed seed += 1 @@ -52,15 +50,15 @@ def MultiHashEmbed( rows if feature == NORM else rows // 2, column=cols.index(feature), seed=seed, - dropout=0.0 + dropout=0.0, ) - + if also_embed_subwords: embeddings = [ make_hash_embed(NORM), make_hash_embed(PREFIX), make_hash_embed(SUFFIX), - make_hash_embed(SHAPE) + make_hash_embed(SHAPE), ] else: embeddings = [make_hash_embed(NORM)] @@ -71,25 +69,25 @@ def MultiHashEmbed( chain( FeatureExtractor(cols), list2ragged(), - with_array(concatenate(*embeddings)) + with_array(concatenate(*embeddings)), ), - StaticVectors(width, dropout=0.0) + StaticVectors(width, dropout=0.0), ), with_array(Maxout(width, nP=3, dropout=0.0, normalize=True)), - ragged2list() + ragged2list(), ) else: model = chain( chain( FeatureExtractor(cols), list2ragged(), - with_array(concatenate(*embeddings)) + with_array(concatenate(*embeddings)), ), with_array(Maxout(width, nP=3, dropout=0.0, normalize=True)), - ragged2list() + ragged2list(), ) return model - + @registry.architectures.register("spacy.CharacterEmbed.v1") def CharacterEmbed(columns, width, rows, nM, nC, features, dropout): @@ -137,6 +135,4 @@ def MishWindowEncoder(width, window_size, depth): def BiLSTMEncoder(width, depth, dropout): if depth == 0: return noop() - return with_padded( - PyTorchLSTM(width, width, bi=True, depth=depth, dropout=dropout) - ) + return with_padded(PyTorchLSTM(width, width, bi=True, depth=depth, dropout=dropout)) diff --git a/spacy/ml/staticvectors.py b/spacy/ml/staticvectors.py index ce2c7efff..41afdbf80 100644 --- a/spacy/ml/staticvectors.py +++ b/spacy/ml/staticvectors.py @@ -15,7 +15,7 @@ def StaticVectors( *, dropout: Optional[float] = None, init_W: Callable = glorot_uniform_init, - key_attr: str="ORTH" + key_attr: str = "ORTH" ) -> Model[List[Doc], Ragged]: """Embed Doc objects with their vocab's vectors table, applying a learned linear projection to control the dimensionality. If a dropout rate is @@ -45,21 +45,17 @@ def forward( ) output = Ragged( model.ops.gemm(model.ops.as_contig(V[rows]), W, trans2=True), - model.ops.asarray([len(doc) for doc in docs], dtype="i") + model.ops.asarray([len(doc) for doc in docs], dtype="i"), ) if mask is not None: output.data *= mask - + def backprop(d_output: Ragged) -> List[Doc]: if mask is not None: d_output.data *= mask model.inc_grad( "W", - model.ops.gemm( - d_output.data, - model.ops.as_contig(V[rows]), - trans1=True - ) + model.ops.gemm(d_output.data, model.ops.as_contig(V[rows]), trans1=True), ) return [] @@ -78,7 +74,7 @@ def init( nM = X[0].vocab.vectors.data.shape[1] if Y is not None: nO = Y.data.shape[1] - + if nM is None: raise ValueError( "Cannot initialize StaticVectors layer: nM dimension unset. " diff --git a/spacy/util.py b/spacy/util.py index 7a26011f1..898e1c2c3 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -190,10 +190,7 @@ def get_module_path(module: ModuleType) -> Path: def load_vectors_into_model( - nlp: "Language", - name: Union[str, Path], - *, - add_strings=True + nlp: "Language", name: Union[str, Path], *, add_strings=True ) -> None: """Load word vectors from an installed model or path into a model instance.""" vectors_nlp = load_model(name) @@ -1205,12 +1202,12 @@ class DummyTokenizer: def link_vectors_to_models( vocab: "Vocab", - models: List[Model]=[], + models: List[Model] = [], *, vectors_name_attr="vectors_name", vectors_attr="vectors", key2row_attr="key2row", - default_vectors_name="spacy_pretrained_vectors" + default_vectors_name="spacy_pretrained_vectors", ) -> None: """Supply vectors data to models.""" vectors = vocab.vectors