mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Checkpoint -- nearly finished reimpl
This commit is contained in:
parent
4441866f55
commit
35458987e8
|
@ -28,6 +28,8 @@ from murmurhash.mrmr cimport hash64
|
||||||
from preshed.maps cimport MapStruct
|
from preshed.maps cimport MapStruct
|
||||||
from preshed.maps cimport map_get
|
from preshed.maps cimport map_get
|
||||||
|
|
||||||
|
from thinc.api import layerize
|
||||||
|
|
||||||
from numpy import exp
|
from numpy import exp
|
||||||
|
|
||||||
from . import _parse_features
|
from . import _parse_features
|
||||||
|
@ -55,40 +57,45 @@ def set_debug(val):
|
||||||
|
|
||||||
|
|
||||||
def get_greedy_model_for_batch(tokvecs, TransitionSystem moves, feat_maps, upper_model):
|
def get_greedy_model_for_batch(tokvecs, TransitionSystem moves, feat_maps, upper_model):
|
||||||
is_valid = model.ops.allocate((len(docs), system.n_moves), dtype='i')
|
cdef int[:, :] is_valid_
|
||||||
costs = model.ops.allocate((len(docs), system.n_moves), dtype='f')
|
cdef float[:, :] costs_
|
||||||
token_ids = model.ops.allocate((len(docs), StateClass.nr_context_tokens()),
|
cdef int[:, :] token_ids
|
||||||
dtype='uint64')
|
is_valid = upper_model.ops.allocate((len(tokvecs), moves.n_moves), dtype='i')
|
||||||
cached, backprops = zip(*[lyr.begin_update(tokvecs) for lyr in feat_maps)
|
costs = upper_model.ops.allocate((len(tokvecs), moves.n_moves), dtype='f')
|
||||||
|
token_ids = upper_model.ops.allocate((len(tokvecs), StateClass.nr_context_tokens()),
|
||||||
|
dtype='uint64')
|
||||||
|
cached, backprops = zip(*[lyr.begin_update(tokvecs) for lyr in feat_maps])
|
||||||
|
is_valid_ = is_valid
|
||||||
|
costs_ = costs
|
||||||
|
|
||||||
def forward(states, drop=0.):
|
def forward(states, drop=0.):
|
||||||
nonlocal is_valid, costs, token_ids, features
|
nonlocal is_valid, costs, token_ids, moves
|
||||||
is_valid = is_valid[:len(states)]
|
is_valid = is_valid[:len(states)]
|
||||||
costs = costs[:len(states)]
|
costs = costs[:len(states)]
|
||||||
token_ids = token_ids[:len(states)]
|
token_ids = token_ids[:len(states)]
|
||||||
is_valid = is_valid[:len(states)]
|
is_valid = is_valid[:len(states)]
|
||||||
for state in states:
|
cdef StateClass state
|
||||||
state.set_context_tokens(&token_ids[i])
|
for i, state in enumerate(states):
|
||||||
moves.set_valid(&is_valid[i], state.c)
|
state.set_context_tokens(token_ids[i])
|
||||||
|
moves.set_valid(&is_valid_[i, 0], state.c)
|
||||||
|
|
||||||
features = cached[token_ids].sum(axis=1)
|
features = cached[token_ids].sum(axis=1)
|
||||||
|
|
||||||
scores, bp_scores = upper_model.begin_update(features, drop=drop)
|
scores, bp_scores = upper_model.begin_update(features, drop=drop)
|
||||||
softmaxed = model.ops.softmax(scores)
|
softmaxed = upper_model.ops.softmax(scores)
|
||||||
# Renormalize for invalid actions
|
# Renormalize for invalid actions
|
||||||
softmaxed *= is_valid
|
softmaxed *= is_valid
|
||||||
softmaxed /= softmaxed.sum(axis=1).reshape((softmaxed.shape[0], 1))
|
softmaxed /= softmaxed.sum(axis=1).reshape((softmaxed.shape[0], 1))
|
||||||
|
|
||||||
def backward(golds, sgd=None):
|
def backward(golds, sgd=None):
|
||||||
nonlocal costs_, is_valid_, moves_
|
nonlocal costs_, is_valid_, moves
|
||||||
cdef TransitionSystem moves = moves_
|
|
||||||
cdef int[:, :] is_valid
|
|
||||||
cdef float[:, :] costs
|
|
||||||
for i, (state, gold) in enumerate(zip(states, golds)):
|
for i, (state, gold) in enumerate(zip(states, golds)):
|
||||||
moves.set_costs(&costs[i], &is_valid[i],
|
moves.set_costs(&is_valid_[i, 0], &costs_[i, 0],
|
||||||
state, gold)
|
state, gold)
|
||||||
set_log_loss(model.ops, d_scores,
|
d_scores = scores.copy()
|
||||||
scores, is_valid, costs)
|
d_scores.fill(0)
|
||||||
|
set_log_loss(upper_model.ops, d_scores,
|
||||||
|
scores, is_valid_, costs_)
|
||||||
d_tokens = bp_scores(d_scores, sgd)
|
d_tokens = bp_scores(d_scores, sgd)
|
||||||
return d_tokens
|
return d_tokens
|
||||||
|
|
||||||
|
@ -119,6 +126,17 @@ def transition_batch(TransitionSystem moves, states, scores):
|
||||||
action.do(state.c, action.label)
|
action.do(state.c, action.label)
|
||||||
|
|
||||||
|
|
||||||
|
def init_states(TransitionSystem moves, docs):
|
||||||
|
states = []
|
||||||
|
cdef Doc doc
|
||||||
|
cdef StateClass state
|
||||||
|
for i, doc in enumerate(docs):
|
||||||
|
state = StateClass.init(doc.c, doc.length)
|
||||||
|
moves.initialize_state(state.c)
|
||||||
|
states.append(state)
|
||||||
|
return states
|
||||||
|
|
||||||
|
|
||||||
cdef class Parser:
|
cdef class Parser:
|
||||||
"""
|
"""
|
||||||
Base class of the DependencyParser and EntityRecognizer.
|
Base class of the DependencyParser and EntityRecognizer.
|
||||||
|
@ -176,7 +194,8 @@ cdef class Parser:
|
||||||
def build_model(self, width=32, nr_vector=1000, nF=1, nB=1, nS=1, nL=1, nR=1, **_):
|
def build_model(self, width=32, nr_vector=1000, nF=1, nB=1, nS=1, nL=1, nR=1, **_):
|
||||||
nr_context_tokens = StateClass.nr_context_tokens(nF, nB, nS, nL, nR)
|
nr_context_tokens = StateClass.nr_context_tokens(nF, nB, nS, nL, nR)
|
||||||
self.model = build_model(width*2, 2, self.moves.n_moves)
|
self.model = build_model(width*2, 2, self.moves.n_moves)
|
||||||
self.feature_maps = build_feature_maps(nr_context_tokens, width, nr_vector))
|
# TODO
|
||||||
|
self.feature_maps = [] #build_feature_maps(nr_context_tokens, width, nr_vector)
|
||||||
|
|
||||||
def __call__(self, Doc tokens):
|
def __call__(self, Doc tokens):
|
||||||
"""
|
"""
|
||||||
|
@ -248,6 +267,7 @@ cdef class Parser:
|
||||||
|
|
||||||
model = get_greedy_model_for_batch([d.tensor for d in docs],
|
model = get_greedy_model_for_batch([d.tensor for d in docs],
|
||||||
self.moves, self.model, self.feat_maps)
|
self.moves, self.model, self.feat_maps)
|
||||||
|
states = init_states(self.moves, docs)
|
||||||
|
|
||||||
d_tokens = [self.model.ops.allocate(d.tensor.shape) for d in docs]
|
d_tokens = [self.model.ops.allocate(d.tensor.shape) for d in docs]
|
||||||
output = list(d_tokens)
|
output = list(d_tokens)
|
||||||
|
@ -261,7 +281,7 @@ cdef class Parser:
|
||||||
transition_batch(self.moves, states)
|
transition_batch(self.moves, states)
|
||||||
# Get unfinished states (and their matching gold and token gradients)
|
# Get unfinished states (and their matching gold and token gradients)
|
||||||
todo = filter(lambda sp: not sp[0].py_is_final(), todo)
|
todo = filter(lambda sp: not sp[0].py_is_final(), todo)
|
||||||
return output, sum(losses)
|
return output
|
||||||
|
|
||||||
def begin_training(self, docs, golds):
|
def begin_training(self, docs, golds):
|
||||||
for gold in golds:
|
for gold in golds:
|
||||||
|
@ -336,31 +356,6 @@ def _begin_update(self, model, states, tokvecs, drop=0.):
|
||||||
return finish_update(d_scores, sgd=sgd)
|
return finish_update(d_scores, sgd=sgd)
|
||||||
return softmaxed, backward
|
return softmaxed, backward
|
||||||
|
|
||||||
def _init_states(self, docs):
|
|
||||||
states = []
|
|
||||||
cdef Doc doc
|
|
||||||
cdef StateClass state
|
|
||||||
for i, doc in enumerate(docs):
|
|
||||||
state = StateClass.init(doc.c, doc.length)
|
|
||||||
self.moves.initialize_state(state.c)
|
|
||||||
states.append(state)
|
|
||||||
return states
|
|
||||||
|
|
||||||
def _validate_batch(self, int[:, ::1] is_valid, states):
|
|
||||||
cdef StateClass state
|
|
||||||
cdef int i
|
|
||||||
for i, state in enumerate(states):
|
|
||||||
self.moves.set_valid(&is_valid[i, 0], state.c)
|
|
||||||
|
|
||||||
def _cost_batch(self, weight_t[:, ::1] costs, int[:, ::1] is_valid,
|
|
||||||
states, golds):
|
|
||||||
cdef int i
|
|
||||||
cdef StateClass state
|
|
||||||
cdef GoldParse gold
|
|
||||||
for i, (state, gold) in enumerate(zip(states, golds)):
|
|
||||||
self.moves.set_costs(&is_valid[i, 0], &costs[i, 0], state, gold)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _get_features(self, states, all_tokvecs, attr_names,
|
def _get_features(self, states, all_tokvecs, attr_names,
|
||||||
nF=1, nB=0, nS=2, nL=2, nR=2):
|
nF=1, nB=0, nS=2, nL=2, nR=2):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user