Resolve conflicts when merging new beam parsing stuff

This commit is contained in:
Matthew Honnibal 2017-08-18 13:38:32 -05:00
commit 426f84937f
10 changed files with 448 additions and 28 deletions

View File

@ -3,7 +3,7 @@ pathlib
numpy>=1.7 numpy>=1.7
cymem>=1.30,<1.32 cymem>=1.30,<1.32
preshed>=1.0.0,<2.0.0 preshed>=1.0.0,<2.0.0
thinc>=6.8.0,<6.9.0 thinc>=6.8.1,<6.9.0
murmurhash>=0.28,<0.29 murmurhash>=0.28,<0.29
plac<1.0.0,>=0.9.6 plac<1.0.0,>=0.9.6
six six

View File

@ -193,7 +193,7 @@ def setup_package():
'murmurhash>=0.28,<0.29', 'murmurhash>=0.28,<0.29',
'cymem>=1.30,<1.32', 'cymem>=1.30,<1.32',
'preshed>=1.0.0,<2.0.0', 'preshed>=1.0.0,<2.0.0',
'thinc>=6.8.0,<6.9.0', 'thinc>=6.8.1,<6.9.0',
'plac<1.0.0,>=0.9.6', 'plac<1.0.0,>=0.9.6',
'pip>=9.0.0,<10.0.0', 'pip>=9.0.0,<10.0.0',
'six', 'six',

View File

@ -8,7 +8,8 @@ import random
from thinc.neural._classes.convolution import ExtractWindow from thinc.neural._classes.convolution import ExtractWindow
from thinc.neural._classes.static_vectors import StaticVectors from thinc.neural._classes.static_vectors import StaticVectors
from thinc.neural._classes.batchnorm import BatchNorm from thinc.neural._classes.batchnorm import BatchNorm as BN
from thinc.neural._classes.layernorm import LayerNorm as LN
from thinc.neural._classes.resnet import Residual from thinc.neural._classes.resnet import Residual
from thinc.neural import ReLu from thinc.neural import ReLu
from thinc.neural._classes.selu import SELU from thinc.neural._classes.selu import SELU
@ -19,7 +20,9 @@ from thinc.api import FeatureExtracter, with_getitem
from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
from thinc.neural._classes.attention import ParametricAttention from thinc.neural._classes.attention import ParametricAttention
from thinc.linear.linear import LinearModel from thinc.linear.linear import LinearModel
from thinc.api import uniqued, wrap from thinc.api import uniqued, wrap, flatten_add_lengths
from thinc.neural._classes.rnn import BiLSTM
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
from .tokens.doc import Doc from .tokens.doc import Doc
@ -192,17 +195,17 @@ def Tok2Vec(width, embed_size, preprocess=None):
suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix') suffix = get_col(cols.index(SUFFIX)) >> HashEmbed(width, embed_size//2, name='embed_suffix')
shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape') shape = get_col(cols.index(SHAPE)) >> HashEmbed(width, embed_size//2, name='embed_shape')
embed = (norm | prefix | suffix | shape ) embed = (norm | prefix | suffix | shape ) >> LN(Maxout(width, width*4, pieces=3))
tok2vec = ( tok2vec = (
with_flatten( with_flatten(
asarray(Model.ops, dtype='uint64') asarray(Model.ops, dtype='uint64')
>> embed >> uniqued(embed, column=5)
>> Maxout(width, width*4, pieces=3) >> drop_layer(
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) Residual(
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) (ExtractWindow(nW=1) >> BN(Maxout(width, width*3)))
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)) )
>> Residual(ExtractWindow(nW=1) >> Maxout(width, width*3)), ) ** 4, pad=4
pad=4) )
) )
if preprocess not in (False, None): if preprocess not in (False, None):
tok2vec = preprocess >> tok2vec tok2vec = preprocess >> tok2vec

View File

@ -654,14 +654,6 @@ cdef class NeuralEntityRecognizer(NeuralParser):
nr_feature = 6 nr_feature = 6
def predict_confidences(self, docs):
tensors = [d.tensor for d in docs]
samples = []
for i in range(10):
states = self.parse_batch(docs, tensors, drop=0.3)
for state in states:
samples.append(self._get_entities(state))
def __reduce__(self): def __reduce__(self):
return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None) return (NeuralEntityRecognizer, (self.vocab, self.moves, self.model), None, None)

View File

@ -0,0 +1,286 @@
# cython: infer_types=True
# cython: profile=True
cimport numpy as np
import numpy
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
from thinc.extra.search cimport Beam
from thinc.extra.search import MaxViolation
from thinc.typedefs cimport hash_t, class_t
from thinc.extra.search cimport MaxViolation
from .transition_system cimport TransitionSystem, Transition
from .stateclass cimport StateClass
from ..gold cimport GoldParse
from ..tokens.doc cimport Doc
# These are passed as callbacks to thinc.search.Beam
cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:
dest = <StateClass>_dest
src = <StateClass>_src
moves = <const Transition*>_moves
dest.clone(src)
moves[clas].do(dest.c, moves[clas].label)
cdef int _check_final_state(void* _state, void* extra_args) except -1:
return (<StateClass>_state).is_final()
def _cleanup(Beam beam):
for i in range(beam.width):
Py_XDECREF(<PyObject*>beam._states[i].content)
Py_XDECREF(<PyObject*>beam._parents[i].content)
cdef hash_t _hash_state(void* _state, void* _) except 0:
state = <StateClass>_state
if state.c.is_final():
return 1
else:
return state.c.hash()
cdef class ParserBeam(object):
cdef public TransitionSystem moves
cdef public object states
cdef public object golds
cdef public object beams
cdef public object dones
def __init__(self, TransitionSystem moves, states, golds,
int width, float density):
self.moves = moves
self.states = states
self.golds = golds
self.beams = []
cdef Beam beam
cdef StateClass state, st
for state in states:
beam = Beam(self.moves.n_moves, width, density)
beam.initialize(self.moves.init_beam_state, state.c.length, state.c._sent)
for i in range(beam.width):
st = <StateClass>beam.at(i)
st.c.offset = state.c.offset
self.beams.append(beam)
self.dones = [False] * len(self.beams)
def __dealloc__(self):
if self.beams is not None:
for beam in self.beams:
if beam is not None:
_cleanup(beam)
@property
def is_done(self):
return all(b.is_done or self.dones[i] for i, b in enumerate(self.beams))
def __getitem__(self, i):
return self.beams[i]
def __len__(self):
return len(self.beams)
def advance(self, scores, follow_gold=False):
cdef Beam beam
for i, beam in enumerate(self.beams):
if beam.is_done or not scores[i].size or self.dones[i]:
continue
self._set_scores(beam, scores[i])
if self.golds is not None:
self._set_costs(beam, self.golds[i], follow_gold=follow_gold)
if follow_gold:
beam.advance(_transition_state, NULL, <void*>self.moves.c)
else:
beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
beam.check_done(_check_final_state, NULL)
if beam.is_done and self.golds is not None:
for j in range(beam.size):
state = <StateClass>beam.at(j)
if state.is_final():
try:
if self.moves.is_gold_parse(state, self.golds[i]):
beam._states[j].loss = 0.0
elif beam._states[j].loss == 0.0:
beam._states[j].loss = 1.0
except NotImplementedError:
break
def _set_scores(self, Beam beam, float[:, ::1] scores):
cdef float* c_scores = &scores[0, 0]
cdef int nr_state = min(scores.shape[0], beam.size)
cdef int nr_class = scores.shape[1]
for i in range(nr_state):
state = <StateClass>beam.at(i)
if not state.is_final():
for j in range(nr_class):
beam.scores[i][j] = c_scores[i * nr_class + j]
self.moves.set_valid(beam.is_valid[i], state.c)
else:
for j in range(beam.nr_class):
beam.scores[i][j] = 0
beam.costs[i][j] = 0
def _set_costs(self, Beam beam, GoldParse gold, int follow_gold=False):
for i in range(beam.size):
state = <StateClass>beam.at(i)
if not state.c.is_final():
self.moves.set_costs(beam.is_valid[i], beam.costs[i], state, gold)
if follow_gold:
for j in range(beam.nr_class):
if beam.costs[i][j] >= 1:
beam.is_valid[i][j] = 0
def get_token_ids(states, int n_tokens):
cdef StateClass state
cdef np.ndarray ids = numpy.zeros((len(states), n_tokens),
dtype='int32', order='C')
c_ids = <int*>ids.data
for i, state in enumerate(states):
if not state.is_final():
state.c.set_context_tokens(c_ids, n_tokens)
else:
ids[i] = -1
c_ids += ids.shape[1]
return ids
nr_update = 0
def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
states, tokvecs, golds,
state2vec, vec2scores,
int width, float density,
sgd=None, losses=None, drop=0.):
global nr_update
cdef MaxViolation violn
nr_update += 1
pbeam = ParserBeam(moves, states, golds,
width=width, density=density)
gbeam = ParserBeam(moves, states, golds,
width=width, density=0.0)
cdef StateClass state
beam_maps = []
backprops = []
violns = [MaxViolation() for _ in range(len(states))]
for t in range(max_steps):
if pbeam.is_done and gbeam.is_done:
break
# The beam maps let us find the right row in the flattened scores
# arrays for each state. States are identified by (example id, history).
# We keep a different beam map for each step (since we'll have a flat
# scores array for each step). The beam map will let us take the per-state
# losses, and compute the gradient for each (step, state, class).
beam_maps.append({})
# Gather all states from the two beams in a list. Some stats may occur
# in both beams. To figure out which beam each state belonged to,
# we keep two lists of indices, p_indices and g_indices
states, p_indices, g_indices = get_states(pbeam, gbeam, beam_maps[-1], nr_update)
if not states:
break
# Now that we have our flat list of states, feed them through the model
token_ids = get_token_ids(states, nr_feature)
vectors, bp_vectors = state2vec.begin_update(token_ids, drop=drop)
scores, bp_scores = vec2scores.begin_update(vectors, drop=drop)
# Store the callbacks for the backward pass
backprops.append((token_ids, bp_vectors, bp_scores))
# Unpack the flat scores into lists for the two beams. The indices arrays
# tell us which example and state the scores-row refers to.
p_scores = [numpy.ascontiguousarray(scores[indices], dtype='f') for indices in p_indices]
g_scores = [numpy.ascontiguousarray(scores[indices], dtype='f') for indices in g_indices]
# Now advance the states in the beams. The gold beam is contrained to
# to follow only gold analyses.
pbeam.advance(p_scores)
gbeam.advance(g_scores, follow_gold=True)
# Track the "maximum violation", to use in the update.
for i, violn in enumerate(violns):
violn.check_crf(pbeam[i], gbeam[i])
histories = []
losses = []
for violn in violns:
if violn.p_hist:
histories.append(violn.p_hist + violn.g_hist)
losses.append(violn.p_probs + violn.g_probs)
else:
histories.append([])
losses.append([])
states_d_scores = get_gradient(moves.n_moves, beam_maps, histories, losses)
return states_d_scores, backprops[:len(states_d_scores)]
def get_states(pbeams, gbeams, beam_map, nr_update):
seen = {}
states = []
p_indices = []
g_indices = []
cdef Beam pbeam, gbeam
assert len(pbeams) == len(gbeams)
for eg_id, (pbeam, gbeam) in enumerate(zip(pbeams, gbeams)):
p_indices.append([])
g_indices.append([])
for i in range(pbeam.size):
state = <StateClass>pbeam.at(i)
if not state.is_final():
key = tuple([eg_id] + pbeam.histories[i])
assert key not in seen, (key, seen)
seen[key] = len(states)
p_indices[-1].append(len(states))
states.append(state)
beam_map.update(seen)
for i in range(gbeam.size):
state = <StateClass>gbeam.at(i)
if not state.is_final():
key = tuple([eg_id] + gbeam.histories[i])
if key in seen:
g_indices[-1].append(seen[key])
else:
g_indices[-1].append(len(states))
beam_map[key] = len(states)
states.append(state)
p_idx = [numpy.asarray(idx, dtype='i') for idx in p_indices]
g_idx = [numpy.asarray(idx, dtype='i') for idx in g_indices]
return states, p_idx, g_idx
def get_gradient(nr_class, beam_maps, histories, losses):
"""
The global model assigns a loss to each parse. The beam scores
are additive, so the same gradient is applied to each action
in the history. This gives the gradient of a single *action*
for a beam state -- so we have "the gradient of loss for taking
action i given history H."
Histories: Each hitory is a list of actions
Each candidate has a history
Each beam has multiple candidates
Each batch has multiple beams
So history is list of lists of lists of ints
"""
nr_step = len(beam_maps)
grads = []
nr_step = 0
for eg_id, hists in enumerate(histories):
for loss, hist in zip(losses[eg_id], hists):
if loss != 0.0 and not numpy.isnan(loss):
nr_step = max(nr_step, len(hist))
for i in range(nr_step):
grads.append(numpy.zeros((max(beam_maps[i].values())+1, nr_class), dtype='f'))
assert len(histories) == len(losses)
for eg_id, hists in enumerate(histories):
for loss, hist in zip(losses[eg_id], hists):
if loss == 0.0 or numpy.isnan(loss):
continue
key = tuple([eg_id])
# Adjust loss for length
avg_loss = loss / len(hist)
loss += avg_loss * (nr_step - len(hist))
for j, clas in enumerate(hist):
i = beam_maps[j][key]
# In step j, at state i action clas
# resulted in loss
grads[j][i, clas] += loss
key = key + tuple([clas])
return grads

View File

@ -73,7 +73,16 @@ cdef cppclass StateC:
free(this.shifted - PADDING) free(this.shifted - PADDING)
void set_context_tokens(int* ids, int n) nogil: void set_context_tokens(int* ids, int n) nogil:
if n == 13: if n == 8:
ids[0] = this.B(0)
ids[1] = this.B(1)
ids[2] = this.S(0)
ids[3] = this.S(1)
ids[4] = this.H(this.S(0))
ids[5] = this.L(this.B(0), 1)
ids[6] = this.L(this.S(0), 2)
ids[7] = this.R(this.S(0), 1)
elif n == 13:
ids[0] = this.B(0) ids[0] = this.B(0)
ids[1] = this.B(1) ids[1] = this.B(1)
ids[2] = this.S(0) ids[2] = this.S(0)

View File

@ -351,6 +351,20 @@ cdef class ArcEager(TransitionSystem):
def __get__(self): def __get__(self):
return (SHIFT, REDUCE, LEFT, RIGHT, BREAK) return (SHIFT, REDUCE, LEFT, RIGHT, BREAK)
def is_gold_parse(self, StateClass state, GoldParse gold):
predicted = set()
truth = set()
for i in range(gold.length):
if gold.cand_to_gold[i] is None:
continue
if state.safe_get(i).dep:
predicted.add((i, state.H(i), self.strings[state.safe_get(i).dep]))
else:
predicted.add((i, state.H(i), 'ROOT'))
id_, word, tag, head, dep, ner = gold.orig_annot[gold.cand_to_gold[i]]
truth.add((id_, head, dep))
return truth == predicted
def has_gold(self, GoldParse gold, start=0, end=None): def has_gold(self, GoldParse gold, start=0, end=None):
end = end or len(gold.heads) end = end or len(gold.heads)
if all([tag is None for tag in gold.heads[start:end]]): if all([tag is None for tag in gold.heads[start:end]]):

View File

@ -107,7 +107,7 @@ cdef class BeamParser(Parser):
# The non-monotonic oracle makes it difficult to ensure final costs are # The non-monotonic oracle makes it difficult to ensure final costs are
# correct. Therefore do final correction # correct. Therefore do final correction
for i in range(pred.size): for i in range(pred.size):
if is_gold(<StateClass>pred.at(i), gold_parse, self.moves.strings): if self.moves.is_gold_parse(<StateClass>pred.at(i), gold_parse):
pred._states[i].loss = 0.0 pred._states[i].loss = 0.0
elif pred._states[i].loss == 0.0: elif pred._states[i].loss == 0.0:
pred._states[i].loss = 1.0 pred._states[i].loss = 1.0
@ -213,7 +213,7 @@ def _check_train_integrity(Beam pred, Beam gold, GoldParse gold_parse, Transitio
if not pred._states[i].is_done or pred._states[i].loss == 0: if not pred._states[i].is_done or pred._states[i].loss == 0:
continue continue
state = <StateClass>pred.at(i) state = <StateClass>pred.at(i)
if is_gold(state, gold_parse, moves.strings) == True: if moves.is_gold_parse(state, gold_parse) == True:
for dep in gold_parse.orig_annot: for dep in gold_parse.orig_annot:
print(dep[1], dep[3], dep[4]) print(dep[1], dep[3], dep[4])
print("Cost", pred._states[i].loss) print("Cost", pred._states[i].loss)
@ -227,7 +227,7 @@ def _check_train_integrity(Beam pred, Beam gold, GoldParse gold_parse, Transitio
if not gold._states[i].is_done: if not gold._states[i].is_done:
continue continue
state = <StateClass>gold.at(i) state = <StateClass>gold.at(i)
if is_gold(state, gold_parse, moves.strings) == False: if moves.is_gold(state, gold_parse) == False:
print("Truth") print("Truth")
for dep in gold_parse.orig_annot: for dep in gold_parse.orig_annot:
print(dep[1], dep[3], dep[4]) print(dep[1], dep[3], dep[4])

View File

@ -37,7 +37,14 @@ from preshed.maps cimport MapStruct
from preshed.maps cimport map_get from preshed.maps cimport map_get
from thinc.api import layerize, chain, noop, clone from thinc.api import layerize, chain, noop, clone
<<<<<<< HEAD
from thinc.neural import Model, Affine, ELU, ReLu, Maxout from thinc.neural import Model, Affine, ELU, ReLu, Maxout
=======
from thinc.neural import Model, Affine, ReLu, Maxout
from thinc.neural._classes.batchnorm import BatchNorm as BN
from thinc.neural._classes.selu import SELU
from thinc.neural._classes.layernorm import LayerNorm
>>>>>>> feature/nn-beam-parser
from thinc.neural.ops import NumpyOps, CupyOps from thinc.neural.ops import NumpyOps, CupyOps
from thinc.neural.util import get_array_module from thinc.neural.util import get_array_module
@ -61,6 +68,10 @@ from ..strings cimport StringStore
from ..gold cimport GoldParse from ..gold cimport GoldParse
from ..attrs cimport TAG, DEP from ..attrs cimport TAG, DEP
<<<<<<< HEAD
=======
USE_FINE_TUNE = True
>>>>>>> feature/nn-beam-parser
def get_templates(*args, **kwargs): def get_templates(*args, **kwargs):
return [] return []
@ -248,6 +259,7 @@ cdef class Parser:
nI=token_vector_width) nI=token_vector_width)
with Model.use_device('cpu'): with Model.use_device('cpu'):
<<<<<<< HEAD
if depth == 0: if depth == 0:
upper = chain() upper = chain()
upper.is_noop = True upper.is_noop = True
@ -257,6 +269,12 @@ cdef class Parser:
zero_init(Affine(nr_class, drop_factor=0.0)) zero_init(Affine(nr_class, drop_factor=0.0))
) )
upper.is_noop = False upper.is_noop = False
=======
upper = chain(
clone(Maxout(hidden_width), (depth-1)),
zero_init(Affine(nr_class, drop_factor=0.0))
)
>>>>>>> feature/nn-beam-parser
# TODO: This is an unfortunate hack atm! # TODO: This is an unfortunate hack atm!
# Used to set input dimensions in network. # Used to set input dimensions in network.
lower.begin_training(lower.ops.allocate((500, token_vector_width))) lower.begin_training(lower.ops.allocate((500, token_vector_width)))
@ -294,6 +312,10 @@ cdef class Parser:
self.moves = self.TransitionSystem(self.vocab.strings, {}) self.moves = self.TransitionSystem(self.vocab.strings, {})
else: else:
self.moves = moves self.moves = moves
if 'beam_width' not in cfg:
cfg['beam_width'] = util.env_opt('beam_width', 1)
if 'beam_density' not in cfg:
cfg['beam_density'] = util.env_opt('beam_density', 0.0)
self.cfg = cfg self.cfg = cfg
if 'actions' in self.cfg: if 'actions' in self.cfg:
for action, labels in self.cfg.get('actions', {}).items(): for action, labels in self.cfg.get('actions', {}).items():
@ -316,7 +338,7 @@ cdef class Parser:
if beam_width is None: if beam_width is None:
beam_width = self.cfg.get('beam_width', 1) beam_width = self.cfg.get('beam_width', 1)
if beam_density is None: if beam_density is None:
beam_density = self.cfg.get('beam_density', 0.001) beam_density = self.cfg.get('beam_density', 0.0)
cdef Beam beam cdef Beam beam
if beam_width == 1: if beam_width == 1:
states = self.parse_batch([doc], [doc.tensor]) states = self.parse_batch([doc], [doc.tensor])
@ -332,7 +354,7 @@ cdef class Parser:
return output return output
def pipe(self, docs, int batch_size=1000, int n_threads=2, def pipe(self, docs, int batch_size=1000, int n_threads=2,
beam_width=1, beam_density=0.001): beam_width=None, beam_density=None):
""" """
Process a stream of documents. Process a stream of documents.
@ -345,6 +367,10 @@ cdef class Parser:
Yields (Doc): Documents, in order. Yields (Doc): Documents, in order.
""" """
cdef StateClass parse_state cdef StateClass parse_state
if beam_width is None:
beam_width = self.cfg.get('beam_width', 1)
if beam_density is None:
beam_density = self.cfg.get('beam_density', 0.0)
cdef Doc doc cdef Doc doc
queue = [] queue = []
for docs in cytoolz.partition_all(batch_size, docs): for docs in cytoolz.partition_all(batch_size, docs):
@ -396,6 +422,7 @@ cdef class Parser:
c_is_valid = <int*>is_valid.data c_is_valid = <int*>is_valid.data
cdef int has_hidden = not getattr(vec2scores, 'is_noop', False) cdef int has_hidden = not getattr(vec2scores, 'is_noop', False)
while not next_step.empty(): while not next_step.empty():
<<<<<<< HEAD
if not has_hidden: if not has_hidden:
for i in cython.parallel.prange( for i in cython.parallel.prange(
next_step.size(), num_threads=6, nogil=True): next_step.size(), num_threads=6, nogil=True):
@ -415,6 +442,21 @@ cdef class Parser:
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class) &c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
action = self.moves.c[guess] action = self.moves.c[guess]
action.do(st, action.label) action.do(st, action.label)
=======
for i in range(next_step.size()):
st = next_step[i]
st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
self.moves.set_valid(&c_is_valid[i*nr_class], st)
vectors = state2vec(token_ids[:next_step.size()])
scores = vec2scores(vectors)
c_scores = <float*>scores.data
for i in range(next_step.size()):
st = next_step[i]
guess = arg_max_if_valid(
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
action = self.moves.c[guess]
action.do(st, action.label)
>>>>>>> feature/nn-beam-parser
this_step, next_step = next_step, this_step this_step, next_step = next_step, this_step
next_step.clear() next_step.clear()
for st in this_step: for st in this_step:
@ -422,7 +464,7 @@ cdef class Parser:
next_step.push_back(st) next_step.push_back(st)
return states return states
def beam_parse(self, docs, tokvecses, int beam_width=8, float beam_density=0.001): def beam_parse(self, docs, tokvecses, int beam_width=3, float beam_density=0.001):
cdef Beam beam cdef Beam beam
cdef np.ndarray scores cdef np.ndarray scores
cdef Doc doc cdef Doc doc
@ -484,6 +526,13 @@ cdef class Parser:
free(token_ids) free(token_ids)
def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None): def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None):
<<<<<<< HEAD
=======
if self.cfg.get('beam_width', 1) >= 2 and numpy.random.random() >= 0.5:
return self.update_beam(docs_tokvecs, golds,
self.cfg['beam_width'], self.cfg['beam_density'],
drop=drop, sgd=sgd, losses=losses)
>>>>>>> feature/nn-beam-parser
if losses is not None and self.name not in losses: if losses is not None and self.name not in losses:
losses[self.name] = 0. losses[self.name] = 0.
docs, tokvec_lists = docs_tokvecs docs, tokvec_lists = docs_tokvecs
@ -540,7 +589,64 @@ cdef class Parser:
break break
self._make_updates(d_tokvecs, self._make_updates(d_tokvecs,
backprops, sgd, cuda_stream) backprops, sgd, cuda_stream)
<<<<<<< HEAD
return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs]) return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
=======
d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
if USE_FINE_TUNE:
bp_my_tokvecs(d_tokvecs, sgd=sgd)
return d_tokvecs
def update_beam(self, docs_tokvecs, golds, width=None, density=None,
drop=0., sgd=None, losses=None):
if width is None:
width = self.cfg.get('beam_width', 2)
if density is None:
density = self.cfg.get('beam_density', 0.0)
if losses is not None and self.name not in losses:
losses[self.name] = 0.
docs, tokvecs = docs_tokvecs
lengths = [len(d) for d in docs]
assert min(lengths) >= 1
tokvecs = self.model[0].ops.flatten(tokvecs)
if USE_FINE_TUNE:
my_tokvecs, bp_my_tokvecs = self.model[0].begin_update(docs_tokvecs, drop=drop)
my_tokvecs = self.model[0].ops.flatten(my_tokvecs)
tokvecs += my_tokvecs
states = self.moves.init_batch(docs)
for gold in golds:
self.moves.preprocess_gold(gold)
cuda_stream = get_cuda_stream()
state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream, 0.0)
states_d_scores, backprops = _beam_utils.update_beam(self.moves, self.nr_feature, 500,
states, tokvecs, golds,
state2vec, vec2scores,
width, density,
sgd=sgd, drop=drop, losses=losses)
backprop_lower = []
for i, d_scores in enumerate(states_d_scores):
if losses is not None:
losses[self.name] += (d_scores**2).sum()
ids, bp_vectors, bp_scores = backprops[i]
d_vector = bp_scores(d_scores, sgd=sgd)
if isinstance(self.model[0].ops, CupyOps) \
and not isinstance(ids, state2vec.ops.xp.ndarray):
backprop_lower.append((
get_async(cuda_stream, ids),
get_async(cuda_stream, d_vector),
bp_vectors))
else:
backprop_lower.append((ids, d_vector, bp_vectors))
d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
self._make_updates(d_tokvecs, backprop_lower, sgd, cuda_stream)
d_tokvecs = self.model[0].ops.unflatten(d_tokvecs, lengths)
if USE_FINE_TUNE:
bp_my_tokvecs(d_tokvecs, sgd=sgd)
return d_tokvecs
>>>>>>> feature/nn-beam-parser
def _init_gold_batch(self, whole_docs, whole_golds): def _init_gold_batch(self, whole_docs, whole_golds):
"""Make a square batch, of length equal to the shortest doc. A long """Make a square batch, of length equal to the shortest doc. A long
@ -585,6 +691,7 @@ cdef class Parser:
xp = get_array_module(d_tokvecs) xp = get_array_module(d_tokvecs)
for ids, d_vector, bp_vector in backprops: for ids, d_vector, bp_vector in backprops:
d_state_features = bp_vector(d_vector, sgd=sgd) d_state_features = bp_vector(d_vector, sgd=sgd)
<<<<<<< HEAD
active_feats = ids * (ids >= 0) active_feats = ids * (ids >= 0)
active_feats = active_feats.reshape((ids.shape[0], ids.shape[1], 1)) active_feats = active_feats.reshape((ids.shape[0], ids.shape[1], 1))
if hasattr(xp, 'scatter_add'): if hasattr(xp, 'scatter_add'):
@ -593,6 +700,12 @@ cdef class Parser:
else: else:
xp.add.at(d_tokvecs, xp.add.at(d_tokvecs,
ids, d_state_features * active_feats) ids, d_state_features * active_feats)
=======
mask = ids >= 0
d_state_features *= mask.reshape(ids.shape + (1,))
self.model[0].ops.scatter_add(d_tokvecs, ids * mask,
d_state_features)
>>>>>>> feature/nn-beam-parser
@property @property
def move_names(self): def move_names(self):
@ -608,7 +721,7 @@ cdef class Parser:
lower, stream, drop=dropout) lower, stream, drop=dropout)
return state2vec, upper return state2vec, upper
nr_feature = 13 nr_feature = 8
def get_token_ids(self, states): def get_token_ids(self, states):
cdef StateClass state cdef StateClass state

View File

@ -99,6 +99,9 @@ cdef class TransitionSystem:
def preprocess_gold(self, GoldParse gold): def preprocess_gold(self, GoldParse gold):
raise NotImplementedError raise NotImplementedError
def is_gold_parse(self, StateClass state, GoldParse gold):
raise NotImplementedError
cdef Transition lookup_transition(self, object name) except *: cdef Transition lookup_transition(self, object name) except *:
raise NotImplementedError raise NotImplementedError