mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Tidy up parser and ML
This commit is contained in:
parent
e3265998c0
commit
e33b7e0b3c
295
spacy/_ml.py
295
spacy/_ml.py
|
@ -1,47 +1,42 @@
|
|||
import ujson
|
||||
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU
|
||||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import numpy
|
||||
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
|
||||
from thinc.i2v import HashEmbed, StaticVectors
|
||||
from thinc.t2t import ExtractWindow, ParametricAttention
|
||||
from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool
|
||||
from thinc.t2v import Pooling, sum_pool
|
||||
from thinc.misc import Residual
|
||||
from thinc.misc import BatchNorm as BN
|
||||
from thinc.misc import LayerNorm as LN
|
||||
|
||||
from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
|
||||
from thinc.api import FeatureExtracter, with_getitem
|
||||
from thinc.api import uniqued, wrap, flatten_add_lengths, noop
|
||||
from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths
|
||||
from thinc.api import uniqued, wrap, noop
|
||||
|
||||
from thinc.linear.linear import LinearModel
|
||||
from thinc.neural.ops import NumpyOps, CupyOps
|
||||
from thinc.neural.util import get_array_module
|
||||
|
||||
import random
|
||||
import cytoolz
|
||||
|
||||
from thinc import describe
|
||||
from thinc.describe import Dimension, Synapses, Biases, Gradient
|
||||
from thinc.neural._classes.affine import _set_dimensions_if_needed
|
||||
import thinc.extra.load_nlp
|
||||
|
||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP, CLUSTER
|
||||
from .tokens.doc import Doc
|
||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
|
||||
from . import util
|
||||
|
||||
import numpy
|
||||
import io
|
||||
|
||||
# TODO: Unset this once we don't want to support models previous models.
|
||||
import thinc.neural._classes.layernorm
|
||||
thinc.neural._classes.layernorm.set_compat_six_eight(False)
|
||||
|
||||
VECTORS_KEY = 'spacy_pretrained_vectors'
|
||||
|
||||
|
||||
@layerize
|
||||
def _flatten_add_lengths(seqs, pad=0, drop=0.):
|
||||
ops = Model.ops
|
||||
lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
|
||||
|
||||
def finish_update(d_X, sgd=None):
|
||||
return ops.unflatten(d_X, lengths, pad=pad)
|
||||
|
||||
X = ops.flatten(seqs, pad=pad)
|
||||
return (X, lengths), finish_update
|
||||
|
||||
|
@ -55,33 +50,14 @@ def _logistic(X, drop=0.):
|
|||
X = xp.minimum(X, 10., X)
|
||||
X = xp.maximum(X, -10., X)
|
||||
Y = 1. / (1. + xp.exp(-X))
|
||||
|
||||
def logistic_bwd(dY, sgd=None):
|
||||
dX = dY * (Y * (1-Y))
|
||||
return dX
|
||||
|
||||
return Y, logistic_bwd
|
||||
|
||||
|
||||
@layerize
|
||||
def add_tuples(X, drop=0.):
|
||||
"""Give inputs of sequence pairs, where each sequence is (vals, length),
|
||||
sum the values, returning a single sequence.
|
||||
|
||||
If input is:
|
||||
((vals1, length), (vals2, length)
|
||||
Output is:
|
||||
(vals1+vals2, length)
|
||||
|
||||
vals are a single tensor for the whole batch.
|
||||
"""
|
||||
(vals1, length1), (vals2, length2) = X
|
||||
assert length1 == length2
|
||||
|
||||
def add_tuples_bwd(dY, sgd=None):
|
||||
return (dY, dY)
|
||||
|
||||
return (vals1+vals2, length), add_tuples_bwd
|
||||
|
||||
|
||||
def _zero_init(model):
|
||||
def _zero_init_impl(self, X, y):
|
||||
self.W.fill(0)
|
||||
|
@ -115,13 +91,12 @@ def _init_for_precomputed(W, ops):
|
|||
nF=Dimension("Number of features"),
|
||||
nO=Dimension("Output size"),
|
||||
W=Synapses("Weights matrix",
|
||||
lambda obj: (obj.nF, obj.nO, obj.nI),
|
||||
lambda W, ops: _init_for_precomputed(W, ops)),
|
||||
lambda obj: (obj.nF, obj.nO, obj.nI),
|
||||
lambda W, ops: _init_for_precomputed(W, ops)),
|
||||
b=Biases("Bias vector",
|
||||
lambda obj: (obj.nO,)),
|
||||
lambda obj: (obj.nO,)),
|
||||
d_W=Gradient("W"),
|
||||
d_b=Gradient("b")
|
||||
)
|
||||
d_b=Gradient("b"))
|
||||
class PrecomputableAffine(Model):
|
||||
def __init__(self, nO=None, nI=None, nF=None, **kwargs):
|
||||
Model.__init__(self, **kwargs)
|
||||
|
@ -134,18 +109,19 @@ class PrecomputableAffine(Model):
|
|||
# Yf: (b, f, i)
|
||||
# dY: (b, o)
|
||||
# dYf: (b, f, o)
|
||||
#Yf = numpy.einsum('bi,foi->bfo', X, self.W)
|
||||
# Yf = numpy.einsum('bi,foi->bfo', X, self.W)
|
||||
Yf = self.ops.xp.tensordot(
|
||||
X, self.W, axes=[[1], [2]])
|
||||
Yf += self.b
|
||||
|
||||
def backward(dY_ids, sgd=None):
|
||||
tensordot = self.ops.xp.tensordot
|
||||
dY, ids = dY_ids
|
||||
Xf = X[ids]
|
||||
|
||||
#dXf = numpy.einsum('bo,foi->bfi', dY, self.W)
|
||||
# dXf = numpy.einsum('bo,foi->bfi', dY, self.W)
|
||||
dXf = tensordot(dY, self.W, axes=[[1], [1]])
|
||||
#dW = numpy.einsum('bo,bfi->ofi', dY, Xf)
|
||||
# dW = numpy.einsum('bo,bfi->ofi', dY, Xf)
|
||||
dW = tensordot(dY, Xf, axes=[[0], [0]])
|
||||
# ofi -> foi
|
||||
self.d_W += dW.transpose((1, 0, 2))
|
||||
|
@ -154,6 +130,7 @@ class PrecomputableAffine(Model):
|
|||
if sgd is not None:
|
||||
sgd(self._mem.weights, self._mem.gradient, key=self.id)
|
||||
return dXf
|
||||
|
||||
return Yf, backward
|
||||
|
||||
|
||||
|
@ -164,13 +141,12 @@ class PrecomputableAffine(Model):
|
|||
nP=Dimension("Number of pieces"),
|
||||
nO=Dimension("Output size"),
|
||||
W=Synapses("Weights matrix",
|
||||
lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI),
|
||||
lambda W, ops: ops.xavier_uniform_init(W)),
|
||||
lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI),
|
||||
lambda W, ops: ops.xavier_uniform_init(W)),
|
||||
b=Biases("Bias vector",
|
||||
lambda obj: (obj.nO, obj.nP)),
|
||||
lambda obj: (obj.nO, obj.nP)),
|
||||
d_W=Gradient("W"),
|
||||
d_b=Gradient("b")
|
||||
)
|
||||
d_b=Gradient("b"))
|
||||
class PrecomputableMaxouts(Model):
|
||||
def __init__(self, nO=None, nI=None, nF=None, nP=3, **kwargs):
|
||||
Model.__init__(self, **kwargs)
|
||||
|
@ -186,114 +162,26 @@ class PrecomputableMaxouts(Model):
|
|||
# dYp: (b, o, p)
|
||||
# W: (f, o, p, i)
|
||||
# b: (o, p)
|
||||
|
||||
# bi,opfi->bfop
|
||||
# bop,fopi->bfi
|
||||
# bop,fbi->opfi : fopi
|
||||
|
||||
tensordot = self.ops.xp.tensordot
|
||||
ascontiguous = self.ops.xp.ascontiguousarray
|
||||
|
||||
Yfp = tensordot(X, self.W, axes=[[1], [3]])
|
||||
Yfp += self.b
|
||||
|
||||
def backward(dYp_ids, sgd=None):
|
||||
dYp, ids = dYp_ids
|
||||
Xf = X[ids]
|
||||
|
||||
dXf = tensordot(dYp, self.W, axes=[[1, 2], [1,2]])
|
||||
dXf = tensordot(dYp, self.W, axes=[[1, 2], [1, 2]])
|
||||
dW = tensordot(dYp, Xf, axes=[[0], [0]])
|
||||
|
||||
self.d_W += dW.transpose((2, 0, 1, 3))
|
||||
self.d_b += dYp.sum(axis=0)
|
||||
|
||||
if sgd is not None:
|
||||
sgd(self._mem.weights, self._mem.gradient, key=self.id)
|
||||
return dXf
|
||||
|
||||
return Yfp, backward
|
||||
|
||||
# Thinc's Embed class is a bit broken atm, so drop this here.
|
||||
from thinc import describe
|
||||
from thinc.neural._classes.embed import _uniform_init
|
||||
|
||||
|
||||
@describe.attributes(
|
||||
nV=describe.Dimension("Number of vectors"),
|
||||
nO=describe.Dimension("Size of output"),
|
||||
vectors=describe.Weights("Embedding table",
|
||||
lambda obj: (obj.nV, obj.nO),
|
||||
_uniform_init(-0.1, 0.1)
|
||||
),
|
||||
d_vectors=describe.Gradient("vectors")
|
||||
)
|
||||
class Embed(Model):
|
||||
name = 'embed'
|
||||
|
||||
def __init__(self, nO, nV=None, **kwargs):
|
||||
if nV is not None:
|
||||
nV += 1
|
||||
Model.__init__(self, **kwargs)
|
||||
if 'name' in kwargs:
|
||||
self.name = kwargs['name']
|
||||
self.column = kwargs.get('column', 0)
|
||||
self.nO = nO
|
||||
self.nV = nV
|
||||
|
||||
def predict(self, ids):
|
||||
if ids.ndim == 2:
|
||||
ids = ids[:, self.column]
|
||||
return self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
|
||||
|
||||
def begin_update(self, ids, drop=0.):
|
||||
if ids.ndim == 2:
|
||||
ids = ids[:, self.column]
|
||||
vectors = self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
|
||||
def backprop_embed(d_vectors, sgd=None):
|
||||
n_vectors = d_vectors.shape[0]
|
||||
self.ops.scatter_add(self.d_vectors, ids, d_vectors)
|
||||
if sgd is not None:
|
||||
sgd(self._mem.weights, self._mem.gradient, key=self.id)
|
||||
return None
|
||||
return vectors, backprop_embed
|
||||
|
||||
|
||||
def HistoryFeatures(nr_class, hist_size=8, nr_dim=8):
|
||||
'''Wrap a model, adding features representing action history.'''
|
||||
if hist_size == 0:
|
||||
return layerize(noop())
|
||||
embed_tables = [Embed(nr_dim, nr_class, column=i, name='embed%d')
|
||||
for i in range(hist_size)]
|
||||
embed = chain(concatenate(*embed_tables),
|
||||
LN(Maxout(hist_size*nr_dim, hist_size*nr_dim)))
|
||||
ops = embed.ops
|
||||
def add_history_fwd(vectors_hists, drop=0.):
|
||||
vectors, hist_ids = vectors_hists
|
||||
hist_feats, bp_hists = embed.begin_update(hist_ids, drop=drop)
|
||||
outputs = ops.xp.hstack((vectors, hist_feats))
|
||||
|
||||
def add_history_bwd(d_outputs, sgd=None):
|
||||
d_vectors = d_outputs[:, :vectors.shape[1]]
|
||||
d_hists = d_outputs[:, vectors.shape[1]:]
|
||||
bp_hists(d_hists, sgd=sgd)
|
||||
return embed.ops.xp.ascontiguousarray(d_vectors)
|
||||
return outputs, add_history_bwd
|
||||
return wrap(add_history_fwd, embed)
|
||||
|
||||
|
||||
def drop_layer(layer, factor=2.):
|
||||
def drop_layer_fwd(X, drop=0.):
|
||||
if drop <= 0.:
|
||||
return layer.begin_update(X, drop=drop)
|
||||
else:
|
||||
coinflip = layer.ops.xp.random.random()
|
||||
if (coinflip / factor) >= drop:
|
||||
return layer.begin_update(X, drop=drop)
|
||||
else:
|
||||
return X, lambda dX, sgd=None: dX
|
||||
|
||||
model = wrap(drop_layer_fwd, layer)
|
||||
model.predict = layer
|
||||
return model
|
||||
|
||||
def link_vectors_to_models(vocab):
|
||||
vectors = vocab.vectors
|
||||
|
@ -308,16 +196,21 @@ def link_vectors_to_models(vocab):
|
|||
# (unideal, I know)
|
||||
thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data
|
||||
|
||||
|
||||
def Tok2Vec(width, embed_size, **kwargs):
|
||||
pretrained_dims = kwargs.get('pretrained_dims', 0)
|
||||
cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2)
|
||||
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
|
||||
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add,
|
||||
'*': reapply}):
|
||||
norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm')
|
||||
prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix')
|
||||
suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix')
|
||||
shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape')
|
||||
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone,
|
||||
'+': add, '*': reapply}):
|
||||
norm = HashEmbed(width, embed_size, column=cols.index(NORM),
|
||||
name='embed_norm')
|
||||
prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX),
|
||||
name='embed_prefix')
|
||||
suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX),
|
||||
name='embed_suffix')
|
||||
shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE),
|
||||
name='embed_shape')
|
||||
if pretrained_dims is not None and pretrained_dims >= 1:
|
||||
glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID))
|
||||
|
||||
|
@ -329,7 +222,6 @@ def Tok2Vec(width, embed_size, **kwargs):
|
|||
(norm | prefix | suffix | shape)
|
||||
>> LN(Maxout(width, width*4, pieces=3)), column=5)
|
||||
|
||||
|
||||
convolution = Residual(
|
||||
ExtractWindow(nW=1)
|
||||
>> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces))
|
||||
|
@ -354,6 +246,7 @@ def reapply(layer, n_times):
|
|||
Y, backprop = layer.begin_update(X, drop=drop)
|
||||
X = Y
|
||||
backprops.append(backprop)
|
||||
|
||||
def reapply_bwd(dY, sgd=None):
|
||||
dX = None
|
||||
for backprop in reversed(backprops):
|
||||
|
@ -363,39 +256,20 @@ def reapply(layer, n_times):
|
|||
else:
|
||||
dX += dY
|
||||
return dX
|
||||
|
||||
return Y, reapply_bwd
|
||||
return wrap(reapply_fwd, layer)
|
||||
|
||||
|
||||
|
||||
|
||||
def asarray(ops, dtype):
|
||||
def forward(X, drop=0.):
|
||||
return ops.asarray(X, dtype=dtype), None
|
||||
return layerize(forward)
|
||||
|
||||
|
||||
def foreach(layer):
|
||||
def forward(Xs, drop=0.):
|
||||
results = []
|
||||
backprops = []
|
||||
for X in Xs:
|
||||
result, bp = layer.begin_update(X, drop=drop)
|
||||
results.append(result)
|
||||
backprops.append(bp)
|
||||
def backward(d_results, sgd=None):
|
||||
dXs = []
|
||||
for d_result, backprop in zip(d_results, backprops):
|
||||
dXs.append(backprop(d_result, sgd))
|
||||
return dXs
|
||||
return results, backward
|
||||
model = layerize(forward)
|
||||
model._layers.append(layer)
|
||||
return model
|
||||
|
||||
|
||||
def rebatch(size, layer):
|
||||
ops = layer.ops
|
||||
|
||||
def forward(X, drop=0.):
|
||||
if X.shape[0] < size:
|
||||
return layer.begin_update(X)
|
||||
|
@ -403,6 +277,7 @@ def rebatch(size, layer):
|
|||
results, bp_results = zip(*[layer.begin_update(p, drop=drop)
|
||||
for p in parts])
|
||||
y = ops.flatten(results)
|
||||
|
||||
def backward(dy, sgd=None):
|
||||
d_parts = [bp(y, sgd=sgd) for bp, y in
|
||||
zip(bp_results, _divide_array(dy, size))]
|
||||
|
@ -413,6 +288,7 @@ def rebatch(size, layer):
|
|||
except ValueError:
|
||||
dX = None
|
||||
return dX
|
||||
|
||||
return y, backward
|
||||
model = layerize(forward)
|
||||
model._layers.append(layer)
|
||||
|
@ -423,13 +299,14 @@ def _divide_array(X, size):
|
|||
parts = []
|
||||
index = 0
|
||||
while index < len(X):
|
||||
parts.append(X[index : index + size])
|
||||
parts.append(X[index:index + size])
|
||||
index += size
|
||||
return parts
|
||||
|
||||
|
||||
def get_col(idx):
|
||||
assert idx >= 0, idx
|
||||
|
||||
def forward(X, drop=0.):
|
||||
assert idx >= 0, idx
|
||||
if isinstance(X, numpy.ndarray):
|
||||
|
@ -437,30 +314,28 @@ def get_col(idx):
|
|||
else:
|
||||
ops = CupyOps()
|
||||
output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
|
||||
|
||||
def backward(y, sgd=None):
|
||||
assert idx >= 0, idx
|
||||
dX = ops.allocate(X.shape)
|
||||
dX[:, idx] += y
|
||||
return dX
|
||||
|
||||
return output, backward
|
||||
|
||||
return layerize(forward)
|
||||
|
||||
|
||||
def zero_init(model):
|
||||
def _hook(self, X, y=None):
|
||||
self.W.fill(0)
|
||||
model.on_data_hooks.append(_hook)
|
||||
return model
|
||||
|
||||
|
||||
def doc2feats(cols=None):
|
||||
if cols is None:
|
||||
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
|
||||
|
||||
def forward(docs, drop=0.):
|
||||
feats = []
|
||||
for doc in docs:
|
||||
feats.append(doc.to_array(cols))
|
||||
return feats, None
|
||||
|
||||
model = layerize(forward)
|
||||
model.cols = cols
|
||||
return model
|
||||
|
@ -474,28 +349,14 @@ def print_shape(prefix):
|
|||
|
||||
@layerize
|
||||
def get_token_vectors(tokens_attrs_vectors, drop=0.):
|
||||
ops = Model.ops
|
||||
tokens, attrs, vectors = tokens_attrs_vectors
|
||||
|
||||
def backward(d_output, sgd=None):
|
||||
return (tokens, d_output)
|
||||
|
||||
return vectors, backward
|
||||
|
||||
|
||||
@layerize
|
||||
def flatten(seqs, drop=0.):
|
||||
if isinstance(seqs[0], numpy.ndarray):
|
||||
ops = NumpyOps()
|
||||
elif hasattr(CupyOps.xp, 'ndarray') and isinstance(seqs[0], CupyOps.xp.ndarray):
|
||||
ops = CupyOps()
|
||||
else:
|
||||
raise ValueError("Unable to flatten sequence of type %s" % type(seqs[0]))
|
||||
lengths = [len(seq) for seq in seqs]
|
||||
def finish_update(d_X, sgd=None):
|
||||
return ops.unflatten(d_X, lengths)
|
||||
X = ops.xp.vstack(seqs)
|
||||
return X, finish_update
|
||||
|
||||
|
||||
@layerize
|
||||
def logistic(X, drop=0.):
|
||||
xp = get_array_module(X)
|
||||
|
@ -505,9 +366,11 @@ def logistic(X, drop=0.):
|
|||
X = xp.minimum(X, 10., X)
|
||||
X = xp.maximum(X, -10., X)
|
||||
Y = 1. / (1. + xp.exp(-X))
|
||||
|
||||
def logistic_bwd(dY, sgd=None):
|
||||
dX = dY * (Y * (1-Y))
|
||||
return dX
|
||||
|
||||
return Y, logistic_bwd
|
||||
|
||||
|
||||
|
@ -517,6 +380,7 @@ def zero_init(model):
|
|||
model.on_data_hooks.append(_zero_init_impl)
|
||||
return model
|
||||
|
||||
|
||||
@layerize
|
||||
def preprocess_doc(docs, drop=0.):
|
||||
keys = [doc.to_array([LOWER]) for doc in docs]
|
||||
|
@ -526,11 +390,13 @@ def preprocess_doc(docs, drop=0.):
|
|||
vals = ops.allocate(keys.shape[0]) + 1
|
||||
return (keys, vals, lengths), None
|
||||
|
||||
|
||||
def getitem(i):
|
||||
def getitem_fwd(X, drop=0.):
|
||||
return X[i], None
|
||||
return layerize(getitem_fwd)
|
||||
|
||||
|
||||
def build_tagger_model(nr_class, **cfg):
|
||||
embed_size = util.env_opt('embed_size', 7000)
|
||||
if 'token_vector_width' in cfg:
|
||||
|
@ -555,8 +421,6 @@ def build_tagger_model(nr_class, **cfg):
|
|||
|
||||
@layerize
|
||||
def SpacyVectors(docs, drop=0.):
|
||||
xp = get_array_module(docs[0].vocab.vectors.data)
|
||||
width = docs[0].vocab.vectors.data.shape[1]
|
||||
batch = []
|
||||
for doc in docs:
|
||||
indices = numpy.zeros((len(doc),), dtype='i')
|
||||
|
@ -570,29 +434,6 @@ def SpacyVectors(docs, drop=0.):
|
|||
return batch, None
|
||||
|
||||
|
||||
def foreach(layer, drop_factor=1.0):
|
||||
'''Map a layer across elements in a list'''
|
||||
def foreach_fwd(Xs, drop=0.):
|
||||
drop *= drop_factor
|
||||
ys = []
|
||||
backprops = []
|
||||
for X in Xs:
|
||||
y, bp_y = layer.begin_update(X, drop=drop)
|
||||
ys.append(y)
|
||||
backprops.append(bp_y)
|
||||
def foreach_bwd(d_ys, sgd=None):
|
||||
d_Xs = []
|
||||
for d_y, bp_y in zip(d_ys, backprops):
|
||||
if bp_y is not None and bp_y is not None:
|
||||
d_Xs.append(d_y, sgd=sgd)
|
||||
else:
|
||||
d_Xs.append(None)
|
||||
return d_Xs
|
||||
return ys, foreach_bwd
|
||||
model = wrap(foreach_fwd, layer)
|
||||
return model
|
||||
|
||||
|
||||
def build_text_classifier(nr_class, width=64, **cfg):
|
||||
nr_vector = cfg.get('nr_vector', 5000)
|
||||
pretrained_dims = cfg.get('pretrained_dims', 0)
|
||||
|
@ -602,9 +443,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
|
|||
model = (
|
||||
SpacyVectors
|
||||
>> flatten_add_lengths
|
||||
>> with_getitem(0,
|
||||
Affine(width, pretrained_dims)
|
||||
)
|
||||
>> with_getitem(0, Affine(width, pretrained_dims))
|
||||
>> ParametricAttention(width)
|
||||
>> Pooling(sum_pool)
|
||||
>> Residual(ReLu(width, width)) ** 2
|
||||
|
@ -613,7 +452,6 @@ def build_text_classifier(nr_class, width=64, **cfg):
|
|||
)
|
||||
return model
|
||||
|
||||
|
||||
lower = HashEmbed(width, nr_vector, column=1)
|
||||
prefix = HashEmbed(width//2, nr_vector, column=2)
|
||||
suffix = HashEmbed(width//2, nr_vector, column=3)
|
||||
|
@ -671,33 +509,40 @@ def build_text_classifier(nr_class, width=64, **cfg):
|
|||
model.lsuv = False
|
||||
return model
|
||||
|
||||
|
||||
@layerize
|
||||
def flatten(seqs, drop=0.):
|
||||
ops = Model.ops
|
||||
lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
|
||||
|
||||
def finish_update(d_X, sgd=None):
|
||||
return ops.unflatten(d_X, lengths, pad=0)
|
||||
|
||||
X = ops.flatten(seqs, pad=0)
|
||||
return X, finish_update
|
||||
|
||||
|
||||
def concatenate_lists(*layers, **kwargs): # pragma: no cover
|
||||
'''Compose two or more models `f`, `g`, etc, such that their outputs are
|
||||
def concatenate_lists(*layers, **kwargs): # pragma: no cover
|
||||
"""Compose two or more models `f`, `g`, etc, such that their outputs are
|
||||
concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
|
||||
'''
|
||||
"""
|
||||
if not layers:
|
||||
return noop()
|
||||
drop_factor = kwargs.get('drop_factor', 1.0)
|
||||
ops = layers[0].ops
|
||||
layers = [chain(layer, flatten) for layer in layers]
|
||||
concat = concatenate(*layers)
|
||||
|
||||
def concatenate_lists_fwd(Xs, drop=0.):
|
||||
drop *= drop_factor
|
||||
lengths = ops.asarray([len(X) for X in Xs], dtype='i')
|
||||
flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
|
||||
ys = ops.unflatten(flat_y, lengths)
|
||||
|
||||
def concatenate_lists_bwd(d_ys, sgd=None):
|
||||
return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
|
||||
|
||||
return ys, concatenate_lists_bwd
|
||||
|
||||
model = wrap(concatenate_lists_fwd, concat)
|
||||
return model
|
||||
|
|
|
@ -49,9 +49,8 @@ from .. import util
|
|||
from ..util import get_async, get_cuda_stream
|
||||
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
|
||||
from .._ml import Tok2Vec, doc2feats, rebatch
|
||||
from .._ml import Residual, drop_layer, flatten
|
||||
from .._ml import Residual, flatten
|
||||
from .._ml import link_vectors_to_models
|
||||
from .._ml import HistoryFeatures
|
||||
from ..compat import json_dumps, copy_array
|
||||
|
||||
from .stateclass cimport StateClass
|
||||
|
@ -77,7 +76,7 @@ def set_debug(val):
|
|||
|
||||
|
||||
cdef class precompute_hiddens:
|
||||
'''Allow a model to be "primed" by pre-computing input features in bulk.
|
||||
"""Allow a model to be "primed" by pre-computing input features in bulk.
|
||||
|
||||
This is used for the parser, where we want to take a batch of documents,
|
||||
and compute vectors for each (token, position) pair. These vectors can then
|
||||
|
@ -92,7 +91,7 @@ cdef class precompute_hiddens:
|
|||
so we can save the factor k. This also gives a nice CPU/GPU division:
|
||||
we can do all our hard maths up front, packed into large multiplications,
|
||||
and do the hard-to-program parsing on the CPU.
|
||||
'''
|
||||
"""
|
||||
cdef int nF, nO, nP
|
||||
cdef bint _is_synchronized
|
||||
cdef public object ops
|
||||
|
@ -280,23 +279,19 @@ cdef class Parser:
|
|||
return (tok2vec, lower, upper), cfg
|
||||
|
||||
def __init__(self, Vocab vocab, moves=True, model=True, **cfg):
|
||||
"""
|
||||
Create a Parser.
|
||||
"""Create a Parser.
|
||||
|
||||
Arguments:
|
||||
vocab (Vocab):
|
||||
The vocabulary object. Must be shared with documents to be processed.
|
||||
The value is set to the .vocab attribute.
|
||||
moves (TransitionSystem):
|
||||
Defines how the parse-state is created, updated and evaluated.
|
||||
The value is set to the .moves attribute unless True (default),
|
||||
in which case a new instance is created with Parser.Moves().
|
||||
model (object):
|
||||
Defines how the parse-state is created, updated and evaluated.
|
||||
The value is set to the .model attribute unless True (default),
|
||||
in which case a new instance is created with Parser.Model().
|
||||
**cfg:
|
||||
Arbitrary configuration parameters. Set to the .cfg attribute
|
||||
vocab (Vocab): The vocabulary object. Must be shared with documents
|
||||
to be processed. The value is set to the `.vocab` attribute.
|
||||
moves (TransitionSystem): Defines how the parse-state is created,
|
||||
updated and evaluated. The value is set to the .moves attribute
|
||||
unless True (default), in which case a new instance is created with
|
||||
`Parser.Moves()`.
|
||||
model (object): Defines how the parse-state is created, updated and
|
||||
evaluated. The value is set to the .model attribute unless True
|
||||
(default), in which case a new instance is created with
|
||||
`Parser.Model()`.
|
||||
**cfg: Arbitrary configuration parameters. Set to the `.cfg` attribute
|
||||
"""
|
||||
self.vocab = vocab
|
||||
if moves is True:
|
||||
|
@ -322,13 +317,10 @@ cdef class Parser:
|
|||
return (Parser, (self.vocab, self.moves, self.model), None, None)
|
||||
|
||||
def __call__(self, Doc doc, beam_width=None, beam_density=None):
|
||||
"""
|
||||
Apply the parser or entity recognizer, setting the annotations onto the Doc object.
|
||||
"""Apply the parser or entity recognizer, setting the annotations onto
|
||||
the `Doc` object.
|
||||
|
||||
Arguments:
|
||||
doc (Doc): The document to be processed.
|
||||
Returns:
|
||||
None
|
||||
doc (Doc): The document to be processed.
|
||||
"""
|
||||
if beam_width is None:
|
||||
beam_width = self.cfg.get('beam_width', 1)
|
||||
|
@ -350,16 +342,13 @@ cdef class Parser:
|
|||
|
||||
def pipe(self, docs, int batch_size=256, int n_threads=2,
|
||||
beam_width=None, beam_density=None):
|
||||
"""
|
||||
Process a stream of documents.
|
||||
"""Process a stream of documents.
|
||||
|
||||
Arguments:
|
||||
stream: The sequence of documents to process.
|
||||
batch_size (int):
|
||||
The number of documents to accumulate into a working set.
|
||||
n_threads (int):
|
||||
The number of threads with which to work on the buffer in parallel.
|
||||
Yields (Doc): Documents, in order.
|
||||
stream: The sequence of documents to process.
|
||||
batch_size (int): Number of documents to accumulate into a working set.
|
||||
n_threads (int): The number of threads with which to work on the buffer
|
||||
in parallel.
|
||||
YIELDS (Doc): Documents, in order.
|
||||
"""
|
||||
if beam_width is None:
|
||||
beam_width = self.cfg.get('beam_width', 1)
|
||||
|
|
Loading…
Reference in New Issue
Block a user