Tidy up parser and ML

This commit is contained in:
ines 2017-10-27 14:39:30 +02:00
parent e3265998c0
commit e33b7e0b3c
2 changed files with 94 additions and 260 deletions

View File

@ -1,47 +1,42 @@
import ujson # coding: utf8
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu, SELU from __future__ import unicode_literals
import numpy
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
from thinc.i2v import HashEmbed, StaticVectors from thinc.i2v import HashEmbed, StaticVectors
from thinc.t2t import ExtractWindow, ParametricAttention from thinc.t2t import ExtractWindow, ParametricAttention
from thinc.t2v import Pooling, max_pool, mean_pool, sum_pool from thinc.t2v import Pooling, sum_pool
from thinc.misc import Residual from thinc.misc import Residual
from thinc.misc import BatchNorm as BN
from thinc.misc import LayerNorm as LN from thinc.misc import LayerNorm as LN
from thinc.api import add, layerize, chain, clone, concatenate, with_flatten from thinc.api import add, layerize, chain, clone, concatenate, with_flatten
from thinc.api import FeatureExtracter, with_getitem from thinc.api import FeatureExtracter, with_getitem, flatten_add_lengths
from thinc.api import uniqued, wrap, flatten_add_lengths, noop from thinc.api import uniqued, wrap, noop
from thinc.linear.linear import LinearModel from thinc.linear.linear import LinearModel
from thinc.neural.ops import NumpyOps, CupyOps from thinc.neural.ops import NumpyOps, CupyOps
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
import random
import cytoolz
from thinc import describe from thinc import describe
from thinc.describe import Dimension, Synapses, Biases, Gradient from thinc.describe import Dimension, Synapses, Biases, Gradient
from thinc.neural._classes.affine import _set_dimensions_if_needed from thinc.neural._classes.affine import _set_dimensions_if_needed
import thinc.extra.load_nlp import thinc.extra.load_nlp
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP, CLUSTER from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE
from .tokens.doc import Doc
from . import util from . import util
import numpy
import io
# TODO: Unset this once we don't want to support models previous models.
import thinc.neural._classes.layernorm
thinc.neural._classes.layernorm.set_compat_six_eight(False)
VECTORS_KEY = 'spacy_pretrained_vectors' VECTORS_KEY = 'spacy_pretrained_vectors'
@layerize @layerize
def _flatten_add_lengths(seqs, pad=0, drop=0.): def _flatten_add_lengths(seqs, pad=0, drop=0.):
ops = Model.ops ops = Model.ops
lengths = ops.asarray([len(seq) for seq in seqs], dtype='i') lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
def finish_update(d_X, sgd=None): def finish_update(d_X, sgd=None):
return ops.unflatten(d_X, lengths, pad=pad) return ops.unflatten(d_X, lengths, pad=pad)
X = ops.flatten(seqs, pad=pad) X = ops.flatten(seqs, pad=pad)
return (X, lengths), finish_update return (X, lengths), finish_update
@ -55,33 +50,14 @@ def _logistic(X, drop=0.):
X = xp.minimum(X, 10., X) X = xp.minimum(X, 10., X)
X = xp.maximum(X, -10., X) X = xp.maximum(X, -10., X)
Y = 1. / (1. + xp.exp(-X)) Y = 1. / (1. + xp.exp(-X))
def logistic_bwd(dY, sgd=None): def logistic_bwd(dY, sgd=None):
dX = dY * (Y * (1-Y)) dX = dY * (Y * (1-Y))
return dX return dX
return Y, logistic_bwd return Y, logistic_bwd
@layerize
def add_tuples(X, drop=0.):
"""Give inputs of sequence pairs, where each sequence is (vals, length),
sum the values, returning a single sequence.
If input is:
((vals1, length), (vals2, length)
Output is:
(vals1+vals2, length)
vals are a single tensor for the whole batch.
"""
(vals1, length1), (vals2, length2) = X
assert length1 == length2
def add_tuples_bwd(dY, sgd=None):
return (dY, dY)
return (vals1+vals2, length), add_tuples_bwd
def _zero_init(model): def _zero_init(model):
def _zero_init_impl(self, X, y): def _zero_init_impl(self, X, y):
self.W.fill(0) self.W.fill(0)
@ -115,13 +91,12 @@ def _init_for_precomputed(W, ops):
nF=Dimension("Number of features"), nF=Dimension("Number of features"),
nO=Dimension("Output size"), nO=Dimension("Output size"),
W=Synapses("Weights matrix", W=Synapses("Weights matrix",
lambda obj: (obj.nF, obj.nO, obj.nI), lambda obj: (obj.nF, obj.nO, obj.nI),
lambda W, ops: _init_for_precomputed(W, ops)), lambda W, ops: _init_for_precomputed(W, ops)),
b=Biases("Bias vector", b=Biases("Bias vector",
lambda obj: (obj.nO,)), lambda obj: (obj.nO,)),
d_W=Gradient("W"), d_W=Gradient("W"),
d_b=Gradient("b") d_b=Gradient("b"))
)
class PrecomputableAffine(Model): class PrecomputableAffine(Model):
def __init__(self, nO=None, nI=None, nF=None, **kwargs): def __init__(self, nO=None, nI=None, nF=None, **kwargs):
Model.__init__(self, **kwargs) Model.__init__(self, **kwargs)
@ -134,18 +109,19 @@ class PrecomputableAffine(Model):
# Yf: (b, f, i) # Yf: (b, f, i)
# dY: (b, o) # dY: (b, o)
# dYf: (b, f, o) # dYf: (b, f, o)
#Yf = numpy.einsum('bi,foi->bfo', X, self.W) # Yf = numpy.einsum('bi,foi->bfo', X, self.W)
Yf = self.ops.xp.tensordot( Yf = self.ops.xp.tensordot(
X, self.W, axes=[[1], [2]]) X, self.W, axes=[[1], [2]])
Yf += self.b Yf += self.b
def backward(dY_ids, sgd=None): def backward(dY_ids, sgd=None):
tensordot = self.ops.xp.tensordot tensordot = self.ops.xp.tensordot
dY, ids = dY_ids dY, ids = dY_ids
Xf = X[ids] Xf = X[ids]
#dXf = numpy.einsum('bo,foi->bfi', dY, self.W) # dXf = numpy.einsum('bo,foi->bfi', dY, self.W)
dXf = tensordot(dY, self.W, axes=[[1], [1]]) dXf = tensordot(dY, self.W, axes=[[1], [1]])
#dW = numpy.einsum('bo,bfi->ofi', dY, Xf) # dW = numpy.einsum('bo,bfi->ofi', dY, Xf)
dW = tensordot(dY, Xf, axes=[[0], [0]]) dW = tensordot(dY, Xf, axes=[[0], [0]])
# ofi -> foi # ofi -> foi
self.d_W += dW.transpose((1, 0, 2)) self.d_W += dW.transpose((1, 0, 2))
@ -154,6 +130,7 @@ class PrecomputableAffine(Model):
if sgd is not None: if sgd is not None:
sgd(self._mem.weights, self._mem.gradient, key=self.id) sgd(self._mem.weights, self._mem.gradient, key=self.id)
return dXf return dXf
return Yf, backward return Yf, backward
@ -164,13 +141,12 @@ class PrecomputableAffine(Model):
nP=Dimension("Number of pieces"), nP=Dimension("Number of pieces"),
nO=Dimension("Output size"), nO=Dimension("Output size"),
W=Synapses("Weights matrix", W=Synapses("Weights matrix",
lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI), lambda obj: (obj.nF, obj.nO, obj.nP, obj.nI),
lambda W, ops: ops.xavier_uniform_init(W)), lambda W, ops: ops.xavier_uniform_init(W)),
b=Biases("Bias vector", b=Biases("Bias vector",
lambda obj: (obj.nO, obj.nP)), lambda obj: (obj.nO, obj.nP)),
d_W=Gradient("W"), d_W=Gradient("W"),
d_b=Gradient("b") d_b=Gradient("b"))
)
class PrecomputableMaxouts(Model): class PrecomputableMaxouts(Model):
def __init__(self, nO=None, nI=None, nF=None, nP=3, **kwargs): def __init__(self, nO=None, nI=None, nF=None, nP=3, **kwargs):
Model.__init__(self, **kwargs) Model.__init__(self, **kwargs)
@ -186,114 +162,26 @@ class PrecomputableMaxouts(Model):
# dYp: (b, o, p) # dYp: (b, o, p)
# W: (f, o, p, i) # W: (f, o, p, i)
# b: (o, p) # b: (o, p)
# bi,opfi->bfop # bi,opfi->bfop
# bop,fopi->bfi # bop,fopi->bfi
# bop,fbi->opfi : fopi # bop,fbi->opfi : fopi
tensordot = self.ops.xp.tensordot tensordot = self.ops.xp.tensordot
ascontiguous = self.ops.xp.ascontiguousarray
Yfp = tensordot(X, self.W, axes=[[1], [3]]) Yfp = tensordot(X, self.W, axes=[[1], [3]])
Yfp += self.b Yfp += self.b
def backward(dYp_ids, sgd=None): def backward(dYp_ids, sgd=None):
dYp, ids = dYp_ids dYp, ids = dYp_ids
Xf = X[ids] Xf = X[ids]
dXf = tensordot(dYp, self.W, axes=[[1, 2], [1, 2]])
dXf = tensordot(dYp, self.W, axes=[[1, 2], [1,2]])
dW = tensordot(dYp, Xf, axes=[[0], [0]]) dW = tensordot(dYp, Xf, axes=[[0], [0]])
self.d_W += dW.transpose((2, 0, 1, 3)) self.d_W += dW.transpose((2, 0, 1, 3))
self.d_b += dYp.sum(axis=0) self.d_b += dYp.sum(axis=0)
if sgd is not None: if sgd is not None:
sgd(self._mem.weights, self._mem.gradient, key=self.id) sgd(self._mem.weights, self._mem.gradient, key=self.id)
return dXf return dXf
return Yfp, backward return Yfp, backward
# Thinc's Embed class is a bit broken atm, so drop this here.
from thinc import describe
from thinc.neural._classes.embed import _uniform_init
@describe.attributes(
nV=describe.Dimension("Number of vectors"),
nO=describe.Dimension("Size of output"),
vectors=describe.Weights("Embedding table",
lambda obj: (obj.nV, obj.nO),
_uniform_init(-0.1, 0.1)
),
d_vectors=describe.Gradient("vectors")
)
class Embed(Model):
name = 'embed'
def __init__(self, nO, nV=None, **kwargs):
if nV is not None:
nV += 1
Model.__init__(self, **kwargs)
if 'name' in kwargs:
self.name = kwargs['name']
self.column = kwargs.get('column', 0)
self.nO = nO
self.nV = nV
def predict(self, ids):
if ids.ndim == 2:
ids = ids[:, self.column]
return self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
def begin_update(self, ids, drop=0.):
if ids.ndim == 2:
ids = ids[:, self.column]
vectors = self.ops.xp.ascontiguousarray(self.vectors[ids], dtype='f')
def backprop_embed(d_vectors, sgd=None):
n_vectors = d_vectors.shape[0]
self.ops.scatter_add(self.d_vectors, ids, d_vectors)
if sgd is not None:
sgd(self._mem.weights, self._mem.gradient, key=self.id)
return None
return vectors, backprop_embed
def HistoryFeatures(nr_class, hist_size=8, nr_dim=8):
'''Wrap a model, adding features representing action history.'''
if hist_size == 0:
return layerize(noop())
embed_tables = [Embed(nr_dim, nr_class, column=i, name='embed%d')
for i in range(hist_size)]
embed = chain(concatenate(*embed_tables),
LN(Maxout(hist_size*nr_dim, hist_size*nr_dim)))
ops = embed.ops
def add_history_fwd(vectors_hists, drop=0.):
vectors, hist_ids = vectors_hists
hist_feats, bp_hists = embed.begin_update(hist_ids, drop=drop)
outputs = ops.xp.hstack((vectors, hist_feats))
def add_history_bwd(d_outputs, sgd=None):
d_vectors = d_outputs[:, :vectors.shape[1]]
d_hists = d_outputs[:, vectors.shape[1]:]
bp_hists(d_hists, sgd=sgd)
return embed.ops.xp.ascontiguousarray(d_vectors)
return outputs, add_history_bwd
return wrap(add_history_fwd, embed)
def drop_layer(layer, factor=2.):
def drop_layer_fwd(X, drop=0.):
if drop <= 0.:
return layer.begin_update(X, drop=drop)
else:
coinflip = layer.ops.xp.random.random()
if (coinflip / factor) >= drop:
return layer.begin_update(X, drop=drop)
else:
return X, lambda dX, sgd=None: dX
model = wrap(drop_layer_fwd, layer)
model.predict = layer
return model
def link_vectors_to_models(vocab): def link_vectors_to_models(vocab):
vectors = vocab.vectors vectors = vocab.vectors
@ -308,16 +196,21 @@ def link_vectors_to_models(vocab):
# (unideal, I know) # (unideal, I know)
thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data thinc.extra.load_nlp.VECTORS[(ops.device, VECTORS_KEY)] = data
def Tok2Vec(width, embed_size, **kwargs): def Tok2Vec(width, embed_size, **kwargs):
pretrained_dims = kwargs.get('pretrained_dims', 0) pretrained_dims = kwargs.get('pretrained_dims', 0)
cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2) cnn_maxout_pieces = kwargs.get('cnn_maxout_pieces', 2)
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add, with Model.define_operators({'>>': chain, '|': concatenate, '**': clone,
'*': reapply}): '+': add, '*': reapply}):
norm = HashEmbed(width, embed_size, column=cols.index(NORM), name='embed_norm') norm = HashEmbed(width, embed_size, column=cols.index(NORM),
prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX), name='embed_prefix') name='embed_norm')
suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX), name='embed_suffix') prefix = HashEmbed(width, embed_size//2, column=cols.index(PREFIX),
shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE), name='embed_shape') name='embed_prefix')
suffix = HashEmbed(width, embed_size//2, column=cols.index(SUFFIX),
name='embed_suffix')
shape = HashEmbed(width, embed_size//2, column=cols.index(SHAPE),
name='embed_shape')
if pretrained_dims is not None and pretrained_dims >= 1: if pretrained_dims is not None and pretrained_dims >= 1:
glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID)) glove = StaticVectors(VECTORS_KEY, width, column=cols.index(ID))
@ -329,7 +222,6 @@ def Tok2Vec(width, embed_size, **kwargs):
(norm | prefix | suffix | shape) (norm | prefix | suffix | shape)
>> LN(Maxout(width, width*4, pieces=3)), column=5) >> LN(Maxout(width, width*4, pieces=3)), column=5)
convolution = Residual( convolution = Residual(
ExtractWindow(nW=1) ExtractWindow(nW=1)
>> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces)) >> LN(Maxout(width, width*3, pieces=cnn_maxout_pieces))
@ -354,6 +246,7 @@ def reapply(layer, n_times):
Y, backprop = layer.begin_update(X, drop=drop) Y, backprop = layer.begin_update(X, drop=drop)
X = Y X = Y
backprops.append(backprop) backprops.append(backprop)
def reapply_bwd(dY, sgd=None): def reapply_bwd(dY, sgd=None):
dX = None dX = None
for backprop in reversed(backprops): for backprop in reversed(backprops):
@ -363,39 +256,20 @@ def reapply(layer, n_times):
else: else:
dX += dY dX += dY
return dX return dX
return Y, reapply_bwd return Y, reapply_bwd
return wrap(reapply_fwd, layer) return wrap(reapply_fwd, layer)
def asarray(ops, dtype): def asarray(ops, dtype):
def forward(X, drop=0.): def forward(X, drop=0.):
return ops.asarray(X, dtype=dtype), None return ops.asarray(X, dtype=dtype), None
return layerize(forward) return layerize(forward)
def foreach(layer):
def forward(Xs, drop=0.):
results = []
backprops = []
for X in Xs:
result, bp = layer.begin_update(X, drop=drop)
results.append(result)
backprops.append(bp)
def backward(d_results, sgd=None):
dXs = []
for d_result, backprop in zip(d_results, backprops):
dXs.append(backprop(d_result, sgd))
return dXs
return results, backward
model = layerize(forward)
model._layers.append(layer)
return model
def rebatch(size, layer): def rebatch(size, layer):
ops = layer.ops ops = layer.ops
def forward(X, drop=0.): def forward(X, drop=0.):
if X.shape[0] < size: if X.shape[0] < size:
return layer.begin_update(X) return layer.begin_update(X)
@ -403,6 +277,7 @@ def rebatch(size, layer):
results, bp_results = zip(*[layer.begin_update(p, drop=drop) results, bp_results = zip(*[layer.begin_update(p, drop=drop)
for p in parts]) for p in parts])
y = ops.flatten(results) y = ops.flatten(results)
def backward(dy, sgd=None): def backward(dy, sgd=None):
d_parts = [bp(y, sgd=sgd) for bp, y in d_parts = [bp(y, sgd=sgd) for bp, y in
zip(bp_results, _divide_array(dy, size))] zip(bp_results, _divide_array(dy, size))]
@ -413,6 +288,7 @@ def rebatch(size, layer):
except ValueError: except ValueError:
dX = None dX = None
return dX return dX
return y, backward return y, backward
model = layerize(forward) model = layerize(forward)
model._layers.append(layer) model._layers.append(layer)
@ -423,13 +299,14 @@ def _divide_array(X, size):
parts = [] parts = []
index = 0 index = 0
while index < len(X): while index < len(X):
parts.append(X[index : index + size]) parts.append(X[index:index + size])
index += size index += size
return parts return parts
def get_col(idx): def get_col(idx):
assert idx >= 0, idx assert idx >= 0, idx
def forward(X, drop=0.): def forward(X, drop=0.):
assert idx >= 0, idx assert idx >= 0, idx
if isinstance(X, numpy.ndarray): if isinstance(X, numpy.ndarray):
@ -437,30 +314,28 @@ def get_col(idx):
else: else:
ops = CupyOps() ops = CupyOps()
output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype) output = ops.xp.ascontiguousarray(X[:, idx], dtype=X.dtype)
def backward(y, sgd=None): def backward(y, sgd=None):
assert idx >= 0, idx assert idx >= 0, idx
dX = ops.allocate(X.shape) dX = ops.allocate(X.shape)
dX[:, idx] += y dX[:, idx] += y
return dX return dX
return output, backward return output, backward
return layerize(forward) return layerize(forward)
def zero_init(model):
def _hook(self, X, y=None):
self.W.fill(0)
model.on_data_hooks.append(_hook)
return model
def doc2feats(cols=None): def doc2feats(cols=None):
if cols is None: if cols is None:
cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH] cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
def forward(docs, drop=0.): def forward(docs, drop=0.):
feats = [] feats = []
for doc in docs: for doc in docs:
feats.append(doc.to_array(cols)) feats.append(doc.to_array(cols))
return feats, None return feats, None
model = layerize(forward) model = layerize(forward)
model.cols = cols model.cols = cols
return model return model
@ -474,28 +349,14 @@ def print_shape(prefix):
@layerize @layerize
def get_token_vectors(tokens_attrs_vectors, drop=0.): def get_token_vectors(tokens_attrs_vectors, drop=0.):
ops = Model.ops
tokens, attrs, vectors = tokens_attrs_vectors tokens, attrs, vectors = tokens_attrs_vectors
def backward(d_output, sgd=None): def backward(d_output, sgd=None):
return (tokens, d_output) return (tokens, d_output)
return vectors, backward return vectors, backward
@layerize
def flatten(seqs, drop=0.):
if isinstance(seqs[0], numpy.ndarray):
ops = NumpyOps()
elif hasattr(CupyOps.xp, 'ndarray') and isinstance(seqs[0], CupyOps.xp.ndarray):
ops = CupyOps()
else:
raise ValueError("Unable to flatten sequence of type %s" % type(seqs[0]))
lengths = [len(seq) for seq in seqs]
def finish_update(d_X, sgd=None):
return ops.unflatten(d_X, lengths)
X = ops.xp.vstack(seqs)
return X, finish_update
@layerize @layerize
def logistic(X, drop=0.): def logistic(X, drop=0.):
xp = get_array_module(X) xp = get_array_module(X)
@ -505,9 +366,11 @@ def logistic(X, drop=0.):
X = xp.minimum(X, 10., X) X = xp.minimum(X, 10., X)
X = xp.maximum(X, -10., X) X = xp.maximum(X, -10., X)
Y = 1. / (1. + xp.exp(-X)) Y = 1. / (1. + xp.exp(-X))
def logistic_bwd(dY, sgd=None): def logistic_bwd(dY, sgd=None):
dX = dY * (Y * (1-Y)) dX = dY * (Y * (1-Y))
return dX return dX
return Y, logistic_bwd return Y, logistic_bwd
@ -517,6 +380,7 @@ def zero_init(model):
model.on_data_hooks.append(_zero_init_impl) model.on_data_hooks.append(_zero_init_impl)
return model return model
@layerize @layerize
def preprocess_doc(docs, drop=0.): def preprocess_doc(docs, drop=0.):
keys = [doc.to_array([LOWER]) for doc in docs] keys = [doc.to_array([LOWER]) for doc in docs]
@ -526,11 +390,13 @@ def preprocess_doc(docs, drop=0.):
vals = ops.allocate(keys.shape[0]) + 1 vals = ops.allocate(keys.shape[0]) + 1
return (keys, vals, lengths), None return (keys, vals, lengths), None
def getitem(i): def getitem(i):
def getitem_fwd(X, drop=0.): def getitem_fwd(X, drop=0.):
return X[i], None return X[i], None
return layerize(getitem_fwd) return layerize(getitem_fwd)
def build_tagger_model(nr_class, **cfg): def build_tagger_model(nr_class, **cfg):
embed_size = util.env_opt('embed_size', 7000) embed_size = util.env_opt('embed_size', 7000)
if 'token_vector_width' in cfg: if 'token_vector_width' in cfg:
@ -555,8 +421,6 @@ def build_tagger_model(nr_class, **cfg):
@layerize @layerize
def SpacyVectors(docs, drop=0.): def SpacyVectors(docs, drop=0.):
xp = get_array_module(docs[0].vocab.vectors.data)
width = docs[0].vocab.vectors.data.shape[1]
batch = [] batch = []
for doc in docs: for doc in docs:
indices = numpy.zeros((len(doc),), dtype='i') indices = numpy.zeros((len(doc),), dtype='i')
@ -570,29 +434,6 @@ def SpacyVectors(docs, drop=0.):
return batch, None return batch, None
def foreach(layer, drop_factor=1.0):
'''Map a layer across elements in a list'''
def foreach_fwd(Xs, drop=0.):
drop *= drop_factor
ys = []
backprops = []
for X in Xs:
y, bp_y = layer.begin_update(X, drop=drop)
ys.append(y)
backprops.append(bp_y)
def foreach_bwd(d_ys, sgd=None):
d_Xs = []
for d_y, bp_y in zip(d_ys, backprops):
if bp_y is not None and bp_y is not None:
d_Xs.append(d_y, sgd=sgd)
else:
d_Xs.append(None)
return d_Xs
return ys, foreach_bwd
model = wrap(foreach_fwd, layer)
return model
def build_text_classifier(nr_class, width=64, **cfg): def build_text_classifier(nr_class, width=64, **cfg):
nr_vector = cfg.get('nr_vector', 5000) nr_vector = cfg.get('nr_vector', 5000)
pretrained_dims = cfg.get('pretrained_dims', 0) pretrained_dims = cfg.get('pretrained_dims', 0)
@ -602,9 +443,7 @@ def build_text_classifier(nr_class, width=64, **cfg):
model = ( model = (
SpacyVectors SpacyVectors
>> flatten_add_lengths >> flatten_add_lengths
>> with_getitem(0, >> with_getitem(0, Affine(width, pretrained_dims))
Affine(width, pretrained_dims)
)
>> ParametricAttention(width) >> ParametricAttention(width)
>> Pooling(sum_pool) >> Pooling(sum_pool)
>> Residual(ReLu(width, width)) ** 2 >> Residual(ReLu(width, width)) ** 2
@ -613,7 +452,6 @@ def build_text_classifier(nr_class, width=64, **cfg):
) )
return model return model
lower = HashEmbed(width, nr_vector, column=1) lower = HashEmbed(width, nr_vector, column=1)
prefix = HashEmbed(width//2, nr_vector, column=2) prefix = HashEmbed(width//2, nr_vector, column=2)
suffix = HashEmbed(width//2, nr_vector, column=3) suffix = HashEmbed(width//2, nr_vector, column=3)
@ -671,33 +509,40 @@ def build_text_classifier(nr_class, width=64, **cfg):
model.lsuv = False model.lsuv = False
return model return model
@layerize @layerize
def flatten(seqs, drop=0.): def flatten(seqs, drop=0.):
ops = Model.ops ops = Model.ops
lengths = ops.asarray([len(seq) for seq in seqs], dtype='i') lengths = ops.asarray([len(seq) for seq in seqs], dtype='i')
def finish_update(d_X, sgd=None): def finish_update(d_X, sgd=None):
return ops.unflatten(d_X, lengths, pad=0) return ops.unflatten(d_X, lengths, pad=0)
X = ops.flatten(seqs, pad=0) X = ops.flatten(seqs, pad=0)
return X, finish_update return X, finish_update
def concatenate_lists(*layers, **kwargs): # pragma: no cover def concatenate_lists(*layers, **kwargs): # pragma: no cover
'''Compose two or more models `f`, `g`, etc, such that their outputs are """Compose two or more models `f`, `g`, etc, such that their outputs are
concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))` concatenated, i.e. `concatenate(f, g)(x)` computes `hstack(f(x), g(x))`
''' """
if not layers: if not layers:
return noop() return noop()
drop_factor = kwargs.get('drop_factor', 1.0) drop_factor = kwargs.get('drop_factor', 1.0)
ops = layers[0].ops ops = layers[0].ops
layers = [chain(layer, flatten) for layer in layers] layers = [chain(layer, flatten) for layer in layers]
concat = concatenate(*layers) concat = concatenate(*layers)
def concatenate_lists_fwd(Xs, drop=0.): def concatenate_lists_fwd(Xs, drop=0.):
drop *= drop_factor drop *= drop_factor
lengths = ops.asarray([len(X) for X in Xs], dtype='i') lengths = ops.asarray([len(X) for X in Xs], dtype='i')
flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop) flat_y, bp_flat_y = concat.begin_update(Xs, drop=drop)
ys = ops.unflatten(flat_y, lengths) ys = ops.unflatten(flat_y, lengths)
def concatenate_lists_bwd(d_ys, sgd=None): def concatenate_lists_bwd(d_ys, sgd=None):
return bp_flat_y(ops.flatten(d_ys), sgd=sgd) return bp_flat_y(ops.flatten(d_ys), sgd=sgd)
return ys, concatenate_lists_bwd return ys, concatenate_lists_bwd
model = wrap(concatenate_lists_fwd, concat) model = wrap(concatenate_lists_fwd, concat)
return model return model

View File

@ -49,9 +49,8 @@ from .. import util
from ..util import get_async, get_cuda_stream from ..util import get_async, get_cuda_stream
from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts from .._ml import zero_init, PrecomputableAffine, PrecomputableMaxouts
from .._ml import Tok2Vec, doc2feats, rebatch from .._ml import Tok2Vec, doc2feats, rebatch
from .._ml import Residual, drop_layer, flatten from .._ml import Residual, flatten
from .._ml import link_vectors_to_models from .._ml import link_vectors_to_models
from .._ml import HistoryFeatures
from ..compat import json_dumps, copy_array from ..compat import json_dumps, copy_array
from .stateclass cimport StateClass from .stateclass cimport StateClass
@ -77,7 +76,7 @@ def set_debug(val):
cdef class precompute_hiddens: cdef class precompute_hiddens:
'''Allow a model to be "primed" by pre-computing input features in bulk. """Allow a model to be "primed" by pre-computing input features in bulk.
This is used for the parser, where we want to take a batch of documents, This is used for the parser, where we want to take a batch of documents,
and compute vectors for each (token, position) pair. These vectors can then and compute vectors for each (token, position) pair. These vectors can then
@ -92,7 +91,7 @@ cdef class precompute_hiddens:
so we can save the factor k. This also gives a nice CPU/GPU division: so we can save the factor k. This also gives a nice CPU/GPU division:
we can do all our hard maths up front, packed into large multiplications, we can do all our hard maths up front, packed into large multiplications,
and do the hard-to-program parsing on the CPU. and do the hard-to-program parsing on the CPU.
''' """
cdef int nF, nO, nP cdef int nF, nO, nP
cdef bint _is_synchronized cdef bint _is_synchronized
cdef public object ops cdef public object ops
@ -280,23 +279,19 @@ cdef class Parser:
return (tok2vec, lower, upper), cfg return (tok2vec, lower, upper), cfg
def __init__(self, Vocab vocab, moves=True, model=True, **cfg): def __init__(self, Vocab vocab, moves=True, model=True, **cfg):
""" """Create a Parser.
Create a Parser.
Arguments: vocab (Vocab): The vocabulary object. Must be shared with documents
vocab (Vocab): to be processed. The value is set to the `.vocab` attribute.
The vocabulary object. Must be shared with documents to be processed. moves (TransitionSystem): Defines how the parse-state is created,
The value is set to the .vocab attribute. updated and evaluated. The value is set to the .moves attribute
moves (TransitionSystem): unless True (default), in which case a new instance is created with
Defines how the parse-state is created, updated and evaluated. `Parser.Moves()`.
The value is set to the .moves attribute unless True (default), model (object): Defines how the parse-state is created, updated and
in which case a new instance is created with Parser.Moves(). evaluated. The value is set to the .model attribute unless True
model (object): (default), in which case a new instance is created with
Defines how the parse-state is created, updated and evaluated. `Parser.Model()`.
The value is set to the .model attribute unless True (default), **cfg: Arbitrary configuration parameters. Set to the `.cfg` attribute
in which case a new instance is created with Parser.Model().
**cfg:
Arbitrary configuration parameters. Set to the .cfg attribute
""" """
self.vocab = vocab self.vocab = vocab
if moves is True: if moves is True:
@ -322,13 +317,10 @@ cdef class Parser:
return (Parser, (self.vocab, self.moves, self.model), None, None) return (Parser, (self.vocab, self.moves, self.model), None, None)
def __call__(self, Doc doc, beam_width=None, beam_density=None): def __call__(self, Doc doc, beam_width=None, beam_density=None):
""" """Apply the parser or entity recognizer, setting the annotations onto
Apply the parser or entity recognizer, setting the annotations onto the Doc object. the `Doc` object.
Arguments: doc (Doc): The document to be processed.
doc (Doc): The document to be processed.
Returns:
None
""" """
if beam_width is None: if beam_width is None:
beam_width = self.cfg.get('beam_width', 1) beam_width = self.cfg.get('beam_width', 1)
@ -350,16 +342,13 @@ cdef class Parser:
def pipe(self, docs, int batch_size=256, int n_threads=2, def pipe(self, docs, int batch_size=256, int n_threads=2,
beam_width=None, beam_density=None): beam_width=None, beam_density=None):
""" """Process a stream of documents.
Process a stream of documents.
Arguments: stream: The sequence of documents to process.
stream: The sequence of documents to process. batch_size (int): Number of documents to accumulate into a working set.
batch_size (int): n_threads (int): The number of threads with which to work on the buffer
The number of documents to accumulate into a working set. in parallel.
n_threads (int): YIELDS (Doc): Documents, in order.
The number of threads with which to work on the buffer in parallel.
Yields (Doc): Documents, in order.
""" """
if beam_width is None: if beam_width is None:
beam_width = self.cfg.get('beam_width', 1) beam_width = self.cfg.get('beam_width', 1)