mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-25 00:34:20 +03:00
Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
commit
df2745eb08
|
@ -239,7 +239,7 @@ def Tok2Vec(width, embed_size, preprocess=None):
|
|||
>> uniqued(embed, column=5)
|
||||
>> drop_layer(
|
||||
Residual(
|
||||
(ExtractWindow(nW=1) >> BN(Maxout(width, width*3)))
|
||||
(ExtractWindow(nW=1) >> ReLu(width, width*3))
|
||||
)
|
||||
) ** 4, pad=4
|
||||
)
|
||||
|
|
|
@ -232,7 +232,10 @@ for verb_data in [
|
|||
{ORTH: "are", LEMMA: "be", NORM: "are", TAG: "VBP", "number": 2},
|
||||
{ORTH: "is", LEMMA: "be", NORM: "is", TAG: "VBZ"},
|
||||
{ORTH: "was", LEMMA: "be", NORM: "was"},
|
||||
{ORTH: "were", LEMMA: "be", NORM: "were"}]:
|
||||
{ORTH: "were", LEMMA: "be", NORM: "were"},
|
||||
{ORTH: "have", NORM: "have"},
|
||||
{ORTH: "has", LEMMA: "have", NORM: "has"},
|
||||
{ORTH: "dare", NORM: "dare"}]:
|
||||
verb_data_tc = dict(verb_data)
|
||||
verb_data_tc[ORTH] = verb_data_tc[ORTH].title()
|
||||
for data in [verb_data, verb_data_tc]:
|
||||
|
|
|
@ -14,4 +14,8 @@ cdef class Parser:
|
|||
cdef readonly TransitionSystem moves
|
||||
cdef readonly object cfg
|
||||
|
||||
cdef void _parse_step(self, StateC* state,
|
||||
const float* feat_weights,
|
||||
int nr_class, int nr_feat, int nr_piece) nogil
|
||||
|
||||
#cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
|
||||
|
|
|
@ -257,10 +257,15 @@ cdef class Parser:
|
|||
nI=token_vector_width)
|
||||
|
||||
with Model.use_device('cpu'):
|
||||
upper = chain(
|
||||
clone(Maxout(hidden_width), (depth-1)),
|
||||
zero_init(Affine(nr_class, drop_factor=0.0))
|
||||
)
|
||||
if depth == 0:
|
||||
upper = chain()
|
||||
upper.is_noop = True
|
||||
else:
|
||||
upper = chain(
|
||||
clone(Maxout(hidden_width), (depth-1)),
|
||||
zero_init(Affine(nr_class, drop_factor=0.0))
|
||||
)
|
||||
upper.is_noop = False
|
||||
# TODO: This is an unfortunate hack atm!
|
||||
# Used to set input dimensions in network.
|
||||
lower.begin_training(lower.ops.allocate((500, token_vector_width)))
|
||||
|
@ -412,20 +417,27 @@ cdef class Parser:
|
|||
cdef np.ndarray scores
|
||||
c_token_ids = <int*>token_ids.data
|
||||
c_is_valid = <int*>is_valid.data
|
||||
cdef int has_hidden = not getattr(vec2scores, 'is_noop', False)
|
||||
while not next_step.empty():
|
||||
for i in range(next_step.size()):
|
||||
st = next_step[i]
|
||||
st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
|
||||
self.moves.set_valid(&c_is_valid[i*nr_class], st)
|
||||
vectors = state2vec(token_ids[:next_step.size()])
|
||||
scores = vec2scores(vectors)
|
||||
c_scores = <float*>scores.data
|
||||
for i in range(next_step.size()):
|
||||
st = next_step[i]
|
||||
guess = arg_max_if_valid(
|
||||
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
|
||||
action = self.moves.c[guess]
|
||||
action.do(st, action.label)
|
||||
if not has_hidden:
|
||||
for i in cython.parallel.prange(
|
||||
next_step.size(), num_threads=6, nogil=True):
|
||||
self._parse_step(next_step[i],
|
||||
feat_weights, nr_class, nr_feat, nr_piece)
|
||||
else:
|
||||
for i in range(next_step.size()):
|
||||
st = next_step[i]
|
||||
st.set_context_tokens(&c_token_ids[i*nr_feat], nr_feat)
|
||||
self.moves.set_valid(&c_is_valid[i*nr_class], st)
|
||||
vectors = state2vec(token_ids[:next_step.size()])
|
||||
scores = vec2scores(vectors)
|
||||
c_scores = <float*>scores.data
|
||||
for i in range(next_step.size()):
|
||||
st = next_step[i]
|
||||
guess = arg_max_if_valid(
|
||||
&c_scores[i*nr_class], &c_is_valid[i*nr_class], nr_class)
|
||||
action = self.moves.c[guess]
|
||||
action.do(st, action.label)
|
||||
this_step, next_step = next_step, this_step
|
||||
next_step.clear()
|
||||
for st in this_step:
|
||||
|
@ -482,6 +494,28 @@ cdef class Parser:
|
|||
beams.append(beam)
|
||||
return beams
|
||||
|
||||
cdef void _parse_step(self, StateC* state,
|
||||
const float* feat_weights,
|
||||
int nr_class, int nr_feat, int nr_piece) nogil:
|
||||
'''This only works with no hidden layers -- fast but inaccurate'''
|
||||
#for i in cython.parallel.prange(next_step.size(), num_threads=4, nogil=True):
|
||||
# self._parse_step(next_step[i], feat_weights, nr_class, nr_feat)
|
||||
token_ids = <int*>calloc(nr_feat, sizeof(int))
|
||||
scores = <float*>calloc(nr_class * nr_piece, sizeof(float))
|
||||
is_valid = <int*>calloc(nr_class, sizeof(int))
|
||||
|
||||
state.set_context_tokens(token_ids, nr_feat)
|
||||
sum_state_features(scores,
|
||||
feat_weights, token_ids, 1, nr_feat, nr_class * nr_piece)
|
||||
self.moves.set_valid(is_valid, state)
|
||||
guess = arg_maxout_if_valid(scores, is_valid, nr_class, nr_piece)
|
||||
action = self.moves.c[guess]
|
||||
action.do(state, action.label)
|
||||
|
||||
free(is_valid)
|
||||
free(scores)
|
||||
free(token_ids)
|
||||
|
||||
def update(self, docs_tokvecs, golds, drop=0., sgd=None, losses=None):
|
||||
if not any(self.moves.has_gold(gold) for gold in golds):
|
||||
return None
|
||||
|
|
Loading…
Reference in New Issue
Block a user