mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
Tidy up and auto-format
This commit is contained in:
parent
92018b9cd4
commit
c5e41247e8
42
spacy/_ml.py
42
spacy/_ml.py
|
@ -307,6 +307,7 @@ def PyTorchBiLSTM(nO, nI, depth, dropout=0.2):
|
||||||
import torch.nn
|
import torch.nn
|
||||||
from thinc.api import with_square_sequences
|
from thinc.api import with_square_sequences
|
||||||
from thinc.extra.wrappers import PyTorchWrapperRNN
|
from thinc.extra.wrappers import PyTorchWrapperRNN
|
||||||
|
|
||||||
if depth == 0:
|
if depth == 0:
|
||||||
return layerize(noop())
|
return layerize(noop())
|
||||||
model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
|
model = torch.nn.LSTM(nI, nO // 2, depth, bidirectional=True, dropout=dropout)
|
||||||
|
@ -322,7 +323,7 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
bilstm_depth = kwargs.get("bilstm_depth", 0)
|
bilstm_depth = kwargs.get("bilstm_depth", 0)
|
||||||
|
|
||||||
cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
|
cols = ["ID", "NORM", "PREFIX", "SUFFIX", "SHAPE", "ORTH"]
|
||||||
|
|
||||||
doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
|
doc2feats_cfg = {"arch": "spacy.Doc2Feats.v1", "config": {"columns": cols}}
|
||||||
if char_embed:
|
if char_embed:
|
||||||
embed_cfg = {
|
embed_cfg = {
|
||||||
|
@ -332,13 +333,10 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
"chars": 6,
|
"chars": 6,
|
||||||
"@mix": {
|
"@mix": {
|
||||||
"arch": "spacy.LayerNormalizedMaxout.v1",
|
"arch": "spacy.LayerNormalizedMaxout.v1",
|
||||||
"config": {
|
"config": {"width": width, "pieces": 3},
|
||||||
"width": width,
|
|
||||||
"pieces": 3
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
"@embed_features": None
|
"@embed_features": None,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
else:
|
else:
|
||||||
embed_cfg = {
|
embed_cfg = {
|
||||||
|
@ -351,12 +349,9 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
"@pretrained_vectors": None,
|
"@pretrained_vectors": None,
|
||||||
"@mix": {
|
"@mix": {
|
||||||
"arch": "spacy.LayerNormalizedMaxout.v1",
|
"arch": "spacy.LayerNormalizedMaxout.v1",
|
||||||
"config": {
|
"config": {"width": width, "pieces": 3},
|
||||||
"width": width,
|
|
||||||
"pieces": 3
|
|
||||||
}
|
|
||||||
},
|
},
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
if pretrained_vectors:
|
if pretrained_vectors:
|
||||||
embed_cfg["config"]["@pretrained_vectors"] = {
|
embed_cfg["config"]["@pretrained_vectors"] = {
|
||||||
|
@ -364,8 +359,8 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
"config": {
|
"config": {
|
||||||
"vectors_name": pretrained_vectors,
|
"vectors_name": pretrained_vectors,
|
||||||
"width": width,
|
"width": width,
|
||||||
"column": cols.index(ID)
|
"column": cols.index(ID),
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
cnn_cfg = {
|
cnn_cfg = {
|
||||||
"arch": "spacy.MaxoutWindowEncoder.v1",
|
"arch": "spacy.MaxoutWindowEncoder.v1",
|
||||||
|
@ -373,35 +368,26 @@ def Tok2Vec(width, embed_size, **kwargs):
|
||||||
"width": width,
|
"width": width,
|
||||||
"window_size": 1,
|
"window_size": 1,
|
||||||
"pieces": cnn_maxout_pieces,
|
"pieces": cnn_maxout_pieces,
|
||||||
"depth": conv_depth
|
"depth": conv_depth,
|
||||||
}
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
bilstm_cfg = {
|
bilstm_cfg = {
|
||||||
"arch": "spacy.TorchBiLSTMEncoder.v1",
|
"arch": "spacy.TorchBiLSTMEncoder.v1",
|
||||||
"config": {
|
"config": {"width": width, "depth": bilstm_depth},
|
||||||
"width": width,
|
|
||||||
"depth": bilstm_depth,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
if conv_depth == 0 and bilstm_depth == 0:
|
if conv_depth == 0 and bilstm_depth == 0:
|
||||||
encode_cfg = {}
|
encode_cfg = {}
|
||||||
elif conv_depth >= 1 and bilstm_depth >= 1:
|
elif conv_depth >= 1 and bilstm_depth >= 1:
|
||||||
encode_cfg = {
|
encode_cfg = {
|
||||||
"arch": "thinc.FeedForward.v1",
|
"arch": "thinc.FeedForward.v1",
|
||||||
"config": {
|
"config": {"children": [cnn_cfg, bilstm_cfg]},
|
||||||
"children": [cnn_cfg, bilstm_cfg]
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
elif conv_depth >= 1:
|
elif conv_depth >= 1:
|
||||||
encode_cfg = cnn_cfg
|
encode_cfg = cnn_cfg
|
||||||
else:
|
else:
|
||||||
encode_cfg = bilstm_cfg
|
encode_cfg = bilstm_cfg
|
||||||
config = {
|
config = {"@doc2feats": doc2feats_cfg, "@embed": embed_cfg, "@encode": encode_cfg}
|
||||||
"@doc2feats": doc2feats_cfg,
|
|
||||||
"@embed": embed_cfg,
|
|
||||||
"@encode": encode_cfg
|
|
||||||
}
|
|
||||||
return new_ml.Tok2Vec(config)
|
return new_ml.Tok2Vec(config)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -262,8 +262,11 @@ def train(
|
||||||
exits=1,
|
exits=1,
|
||||||
)
|
)
|
||||||
train_docs = corpus.train_docs(
|
train_docs = corpus.train_docs(
|
||||||
nlp, noise_level=noise_level, gold_preproc=gold_preproc, max_length=0,
|
nlp,
|
||||||
ignore_misaligned=True
|
noise_level=noise_level,
|
||||||
|
gold_preproc=gold_preproc,
|
||||||
|
max_length=0,
|
||||||
|
ignore_misaligned=True,
|
||||||
)
|
)
|
||||||
train_labels = set()
|
train_labels = set()
|
||||||
if textcat_multilabel:
|
if textcat_multilabel:
|
||||||
|
@ -344,7 +347,7 @@ def train(
|
||||||
orth_variant_level=orth_variant_level,
|
orth_variant_level=orth_variant_level,
|
||||||
gold_preproc=gold_preproc,
|
gold_preproc=gold_preproc,
|
||||||
max_length=0,
|
max_length=0,
|
||||||
ignore_misaligned=True
|
ignore_misaligned=True,
|
||||||
)
|
)
|
||||||
if raw_text:
|
if raw_text:
|
||||||
random.shuffle(raw_text)
|
random.shuffle(raw_text)
|
||||||
|
@ -383,8 +386,11 @@ def train(
|
||||||
if hasattr(component, "cfg"):
|
if hasattr(component, "cfg"):
|
||||||
component.cfg["beam_width"] = beam_width
|
component.cfg["beam_width"] = beam_width
|
||||||
dev_docs = list(
|
dev_docs = list(
|
||||||
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
|
corpus.dev_docs(
|
||||||
ignore_misaligned=True)
|
nlp_loaded,
|
||||||
|
gold_preproc=gold_preproc,
|
||||||
|
ignore_misaligned=True,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
|
nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
|
||||||
start_time = timer()
|
start_time = timer()
|
||||||
|
@ -401,8 +407,11 @@ def train(
|
||||||
if hasattr(component, "cfg"):
|
if hasattr(component, "cfg"):
|
||||||
component.cfg["beam_width"] = beam_width
|
component.cfg["beam_width"] = beam_width
|
||||||
dev_docs = list(
|
dev_docs = list(
|
||||||
corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc,
|
corpus.dev_docs(
|
||||||
ignore_misaligned=True)
|
nlp_loaded,
|
||||||
|
gold_preproc=gold_preproc,
|
||||||
|
ignore_misaligned=True,
|
||||||
|
)
|
||||||
)
|
)
|
||||||
start_time = timer()
|
start_time = timer()
|
||||||
scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
|
scorer = nlp_loaded.evaluate(dev_docs, verbose=verbose)
|
||||||
|
|
|
@ -131,9 +131,7 @@ class Language(object):
|
||||||
Defaults = BaseDefaults
|
Defaults = BaseDefaults
|
||||||
lang = None
|
lang = None
|
||||||
|
|
||||||
factories = {
|
factories = {"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp)}
|
||||||
"tokenizer": lambda nlp: nlp.Defaults.create_tokenizer(nlp),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs
|
self, vocab=True, make_doc=True, max_length=10 ** 6, meta={}, **kwargs
|
||||||
|
|
|
@ -1 +1,5 @@
|
||||||
from .tok2vec import Tok2Vec
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from .tok2vec import Tok2Vec # noqa: F401
|
||||||
|
from .common import FeedForward, LayerNormalizedMaxout # noqa: F401
|
||||||
|
|
|
@ -13,6 +13,7 @@ def FeedForward(config):
|
||||||
model.cfg = config
|
model.cfg = config
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
@register_architecture("spacy.LayerNormalizedMaxout.v1")
|
@register_architecture("spacy.LayerNormalizedMaxout.v1")
|
||||||
def LayerNormalizedMaxout(config):
|
def LayerNormalizedMaxout(config):
|
||||||
width = config["width"]
|
width = config["width"]
|
||||||
|
|
|
@ -9,7 +9,6 @@ from thinc.misc import Residual, LayerNorm, FeatureExtracter
|
||||||
|
|
||||||
from ..util import make_layer, register_architecture
|
from ..util import make_layer, register_architecture
|
||||||
from ._wire import concatenate_lists
|
from ._wire import concatenate_lists
|
||||||
from .common import *
|
|
||||||
|
|
||||||
|
|
||||||
@register_architecture("spacy.Tok2Vec.v1")
|
@register_architecture("spacy.Tok2Vec.v1")
|
||||||
|
@ -81,8 +80,7 @@ def MaxoutWindowEncoder(config):
|
||||||
depth = config["depth"]
|
depth = config["depth"]
|
||||||
|
|
||||||
cnn = chain(
|
cnn = chain(
|
||||||
ExtractWindow(nW=nW),
|
ExtractWindow(nW=nW), LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP))
|
||||||
LayerNorm(Maxout(nO, nO * ((nW * 2) + 1), pieces=nP)),
|
|
||||||
)
|
)
|
||||||
model = clone(Residual(cnn), depth)
|
model = clone(Residual(cnn), depth)
|
||||||
model.nO = nO
|
model.nO = nO
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
import srsly
|
import srsly
|
||||||
from spacy.gold import GoldCorpus, json_to_tuple
|
from spacy.gold import GoldCorpus
|
||||||
|
|
||||||
from spacy.lang.en import English
|
from spacy.lang.en import English
|
||||||
from spacy.tests.util import make_tempdir
|
from spacy.tests.util import make_tempdir
|
||||||
|
@ -94,4 +94,3 @@ json_data = [
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
@ -205,4 +205,3 @@ def test_align(tokens_a, tokens_b, expected):
|
||||||
# check symmetry
|
# check symmetry
|
||||||
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a)
|
cost, a2b, b2a, a2b_multi, b2a_multi = align(tokens_b, tokens_a)
|
||||||
assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected
|
assert (cost, list(b2a), list(a2b), b2a_multi, a2b_multi) == expected
|
||||||
|
|
||||||
|
|
|
@ -96,14 +96,14 @@ def test_PrecomputableAffine(nO=4, nI=5, nF=3, nP=2):
|
||||||
|
|
||||||
def test_prefer_gpu():
|
def test_prefer_gpu():
|
||||||
try:
|
try:
|
||||||
import cupy
|
import cupy # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
assert not prefer_gpu()
|
assert not prefer_gpu()
|
||||||
|
|
||||||
|
|
||||||
def test_require_gpu():
|
def test_require_gpu():
|
||||||
try:
|
try:
|
||||||
import cupy
|
import cupy # noqa: F401
|
||||||
except ImportError:
|
except ImportError:
|
||||||
with pytest.raises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
require_gpu()
|
require_gpu()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user