Tidy up and auto-format

This commit is contained in:
Ines Montani 2019-10-28 12:43:55 +01:00
parent 92018b9cd4
commit c5e41247e8
9 changed files with 41 additions and 47 deletions

View File

@ -307,6 +307,7 @@ def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
import torch.nn
from thinc.api import with_square_sequences
from thinc.extra.wrappers import PyTorchWrapperRNN
if depth == 0:
return layerize(noop())
model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
@ -322,7 +323,7 @@ def Tok2Vec(width, embed_size, **kwargs):
bilstm_depth = kwargs.get("bilstm_depth", 0)
cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
if char_embed:
embed_cfg = {
@ -332,13 +333,10 @@ def Tok2Vec(width, embed_size, **kwargs):
"chars": 6,
"@mix": {
"arch": "spacy.LayerNormalizedMaxout.v1",
"config": {
"width": width,
"pieces": 3
}
"config": {"width": width, "pieces": 3},
},
"@embed_features": None
}
"@embed_features": None,
},
}
else:
embed_cfg = {
@ -351,12 +349,9 @@ def Tok2Vec(width, embed_size, **kwargs):
"@pretrained_vectors": None,
"@mix": {
"arch": "spacy.LayerNormalizedMaxout.v1",
"config": {
"width": width,
"pieces": 3
}
"config": {"width": width, "pieces": 3},
},
}
},
}
if pretrained_vectors:
embed_cfg["config"]["@pretrained_vectors"] = {
@ -364,8 +359,8 @@ def Tok2Vec(width, embed_size, **kwargs):
"config": {
"vectors_name": pretrained_vectors,
"width": width,
"column": cols.index(ID)
}
"column": cols.index(ID),
},
}
cnn_cfg = {
"arch": "spacy.MaxoutWindowEncoder.v1",
@ -373,35 +368,26 @@ def Tok2Vec(width, embed_size, **kwargs):
"width": width,
"window_size": 1,
"pieces": cnn_maxout_pieces,
"depth": conv_depth
}
"depth": conv_depth,
},
}
bilstm_cfg = {
"arch": "spacy.TorchBiLSTMEncoder.v1",
"config": {
"width": width,
"depth": bilstm_depth,
}
"config": {"width": width, "depth": bilstm_depth},
}
if conv_depth == 0 and bilstm_depth == 0:
encode_cfg = {}
elif conv_depth >= 1 and bilstm_depth >= 1:
encode_cfg = {
"arch": "thinc.FeedForward.v1",
"config": {
"children": [cnn_cfg, bilstm_cfg]
}
"config": {"children": [cnn_cfg, bilstm_cfg]},
}
elif conv_depth >= 1:
encode_cfg = cnn_cfg
else:
encode_cfg = bilstm_cfg
config = {
"@doc2feats": doc2feats_cfg,
"@embed": embed_cfg,
"@encode": encode_cfg
}
config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg}
return new_ml.Tok2Vec(config)

View File

@ -262,8 +262,11 @@ def train(
exits=1,
)
train_docs = corpus.train_docs(
nlp, noise_level=noise_level, gold_preproc=gold_preproc, max_length=0,
ignore_misaligned=True
nlp,
noise_level=noise_level,
gold_preproc=gold_preproc,
max_length=0,
ignore_misaligned=True,
)
train_labels = set()
if textcat_multilabel:
@ -344,7 +347,7 @@ def train(
orth_variant_level=orth_variant_level,
gold_preproc=gold_preproc,
max_length=0,
ignore_misaligned=True
ignore_misaligned=True,
)
if raw_text:
random.shuffle(raw_text)
@ -383,8 +386,11 @@ def train(
if hasattr(component, "cfg"):
component.cfg["beam_width"] = beam_width
dev_docs = list(
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
ignore_misaligned=True)
corpus.dev_docs(
nlp_loaded,
gold_preproc=gold_preproc,
ignore_misaligned=True,
)
)
nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
start_time = timer()
@ -401,8 +407,11 @@ def train(
if hasattr(component, "cfg"):
component.cfg["beam_width"] = beam_width
dev_docs = list(
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
ignore_misaligned=True)
corpus.dev_docs(
nlp_loaded,
gold_preproc=gold_preproc,
ignore_misaligned=True,
)
)
start_time = timer()
scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)

View File

@ -131,9 +131,7 @@ class Language(object):
Defaults = BaseDefaults
lang = None
factories = {
"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp),
}
factories = {"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp)}
def __init__(
self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs

View File

@ -1 +1,5 @@
from .tok2vec import Tok2Vec
# coding: utf8
from __future__ import unicode_literals
from .tok2vec import Tok2Vec # noqa: F401
from .common import FeedForward, LayerNormalizedMaxout # noqa: F401

View File

@ -13,6 +13,7 @@ def FeedForward(config):
model.cfg = config
return model
@register_architecture("spacy.LayerNormalizedMaxout.v1")
def LayerNormalizedMaxout(config):
width = config["width"]

View File

@ -9,7 +9,6 @@ from thinc.misc import Residual, LayerNorm, FeatureExtracter
from ..util import make_layer, register_architecture
from ._wire import concatenate_lists
from .common import *
@register_architecture("spacy.Tok2Vec.v1")
@ -81,8 +80,7 @@ def MaxoutWindowEncoder(config):
depth = config["depth"]
cnn = chain(
ExtractWindow(nW=nW),
LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)),
ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))
)
model = clone(Residual(cnn), depth)
model.nO = nO

View File

@ -2,7 +2,7 @@
from __future__ import unicode_literals
import srsly
from spacy.gold import GoldCorpus, json_to_tuple
from spacy.gold import GoldCorpus
from spacy.lang.en import English
from spacy.tests.util import make_tempdir
@ -94,4 +94,3 @@ json_data = [
],
}
]

View File

@ -205,4 +205,3 @@ def test_align(tokens_a, tokens_b, expected):
# check symmetry
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a)
assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected

View File

@ -96,14 +96,14 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
def test_prefer_gpu():
try:
import cupy
import cupy # noqa: F401
except ImportError:
assert not prefer_gpu()
def test_require_gpu():
try:
import cupy
import cupy # noqa: F401
except ImportError:
with pytest.raises(ValueError):
require_gpu()