mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Revert "WIP on improving parser efficiency"
This reverts commit bdaac7ab44
.
This commit is contained in:
parent
bdaac7ab44
commit
532afef4a8
|
@ -9,7 +9,6 @@ from pathlib import Path
|
||||||
import dill
|
import dill
|
||||||
import tqdm
|
import tqdm
|
||||||
from thinc.neural.optimizers import linear_decay
|
from thinc.neural.optimizers import linear_decay
|
||||||
from timeit import default_timer as timer
|
|
||||||
|
|
||||||
from ..tokens.doc import Doc
|
from ..tokens.doc import Doc
|
||||||
from ..scorer import Scorer
|
from ..scorer import Scorer
|
||||||
|
@ -82,13 +81,8 @@ def train(_, lang, output_dir, train_data, dev_data, n_iter=20, n_sents=0,
|
||||||
batch_size = min(batch_size, max_batch_size)
|
batch_size = min(batch_size, max_batch_size)
|
||||||
dropout = linear_decay(orig_dropout, dropout_decay, i*n_train_docs+idx)
|
dropout = linear_decay(orig_dropout, dropout_decay, i*n_train_docs+idx)
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
start = timer()
|
|
||||||
scorer = nlp.evaluate(corpus.dev_docs(nlp))
|
scorer = nlp.evaluate(corpus.dev_docs(nlp))
|
||||||
end = timer()
|
print_progress(i, {}, scorer.scores)
|
||||||
n_words = scorer.tokens.tp + scorer.tokens.fn
|
|
||||||
assert n_words != 0
|
|
||||||
wps = n_words / (end-start)
|
|
||||||
print_progress(i, {}, scorer.scores, wps=wps)
|
|
||||||
with (output_path / 'model.bin').open('wb') as file_:
|
with (output_path / 'model.bin').open('wb') as file_:
|
||||||
with nlp.use_params(optimizer.averages):
|
with nlp.use_params(optimizer.averages):
|
||||||
dill.dump(nlp, file_, -1)
|
dill.dump(nlp, file_, -1)
|
||||||
|
@ -104,14 +98,14 @@ def _render_parses(i, to_render):
|
||||||
file_.write(html)
|
file_.write(html)
|
||||||
|
|
||||||
|
|
||||||
def print_progress(itn, losses, dev_scores, wps=0.0):
|
def print_progress(itn, losses, dev_scores):
|
||||||
|
# TODO: Fix!
|
||||||
scores = {}
|
scores = {}
|
||||||
for col in ['dep_loss', 'tag_loss', 'uas', 'tags_acc', 'token_acc',
|
for col in ['dep_loss', 'tag_loss', 'uas', 'tags_acc', 'token_acc',
|
||||||
'ents_p', 'ents_r', 'ents_f', 'wps']:
|
'ents_p', 'ents_r', 'ents_f']:
|
||||||
scores[col] = 0.0
|
scores[col] = 0.0
|
||||||
scores.update(losses)
|
scores.update(losses)
|
||||||
scores.update(dev_scores)
|
scores.update(dev_scores)
|
||||||
scores[wps] = wps
|
|
||||||
tpl = '\t'.join((
|
tpl = '\t'.join((
|
||||||
'{:d}',
|
'{:d}',
|
||||||
'{dep_loss:.3f}',
|
'{dep_loss:.3f}',
|
||||||
|
@ -121,8 +115,7 @@ def print_progress(itn, losses, dev_scores, wps=0.0):
|
||||||
'{ents_r:.3f}',
|
'{ents_r:.3f}',
|
||||||
'{ents_f:.3f}',
|
'{ents_f:.3f}',
|
||||||
'{tags_acc:.3f}',
|
'{tags_acc:.3f}',
|
||||||
'{token_acc:.3f}',
|
'{token_acc:.3f}'))
|
||||||
'{wps:.1f}'))
|
|
||||||
print(tpl.format(itn, **scores))
|
print(tpl.format(itn, **scores))
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -144,7 +144,7 @@ def _min_edit_path(cand_words, gold_words):
|
||||||
class GoldCorpus(object):
|
class GoldCorpus(object):
|
||||||
"""An annotated corpus, using the JSON file format. Manages
|
"""An annotated corpus, using the JSON file format. Manages
|
||||||
annotations for tagging, dependency parsing and NER."""
|
annotations for tagging, dependency parsing and NER."""
|
||||||
def __init__(self, train_path, dev_path, gold_preproc=True, limit=None):
|
def __init__(self, train_path, dev_path, limit=None):
|
||||||
"""Create a GoldCorpus.
|
"""Create a GoldCorpus.
|
||||||
|
|
||||||
train_path (unicode or Path): File or directory of training data.
|
train_path (unicode or Path): File or directory of training data.
|
||||||
|
@ -184,7 +184,7 @@ class GoldCorpus(object):
|
||||||
n += 1
|
n += 1
|
||||||
return n
|
return n
|
||||||
|
|
||||||
def train_docs(self, nlp, shuffle=0, gold_preproc=False,
|
def train_docs(self, nlp, shuffle=0, gold_preproc=True,
|
||||||
projectivize=False):
|
projectivize=False):
|
||||||
train_tuples = self.train_tuples
|
train_tuples = self.train_tuples
|
||||||
if projectivize:
|
if projectivize:
|
||||||
|
@ -195,7 +195,7 @@ class GoldCorpus(object):
|
||||||
gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc)
|
gold_docs = self.iter_gold_docs(nlp, train_tuples, gold_preproc)
|
||||||
yield from gold_docs
|
yield from gold_docs
|
||||||
|
|
||||||
def dev_docs(self, nlp, gold_preproc=False):
|
def dev_docs(self, nlp, gold_preproc=True):
|
||||||
gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
|
gold_docs = self.iter_gold_docs(nlp, self.dev_tuples, gold_preproc)
|
||||||
gold_docs = nlp.preprocess_gold(gold_docs)
|
gold_docs = nlp.preprocess_gold(gold_docs)
|
||||||
yield from gold_docs
|
yield from gold_docs
|
||||||
|
@ -203,11 +203,6 @@ class GoldCorpus(object):
|
||||||
@classmethod
|
@classmethod
|
||||||
def iter_gold_docs(cls, nlp, tuples, gold_preproc):
|
def iter_gold_docs(cls, nlp, tuples, gold_preproc):
|
||||||
for raw_text, paragraph_tuples in tuples:
|
for raw_text, paragraph_tuples in tuples:
|
||||||
if gold_preproc:
|
|
||||||
raw_text = None
|
|
||||||
else:
|
|
||||||
paragraph_tuples = merge_sents(paragraph_tuples)
|
|
||||||
|
|
||||||
docs = cls._make_docs(nlp, raw_text, paragraph_tuples,
|
docs = cls._make_docs(nlp, raw_text, paragraph_tuples,
|
||||||
gold_preproc)
|
gold_preproc)
|
||||||
golds = cls._make_golds(docs, paragraph_tuples)
|
golds = cls._make_golds(docs, paragraph_tuples)
|
||||||
|
@ -216,11 +211,15 @@ class GoldCorpus(object):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_docs(cls, nlp, raw_text, paragraph_tuples, gold_preproc):
|
def _make_docs(cls, nlp, raw_text, paragraph_tuples, gold_preproc):
|
||||||
if raw_text is not None:
|
if gold_preproc:
|
||||||
return [nlp.make_doc(raw_text)]
|
|
||||||
else:
|
|
||||||
return [Doc(nlp.vocab, words=sent_tuples[0][1])
|
return [Doc(nlp.vocab, words=sent_tuples[0][1])
|
||||||
for sent_tuples in paragraph_tuples]
|
for sent_tuples in paragraph_tuples]
|
||||||
|
elif raw_text is not None:
|
||||||
|
return [nlp.make_doc(raw_text)]
|
||||||
|
else:
|
||||||
|
docs = [Doc(nlp.vocab, words=sent_tuples[0][1])
|
||||||
|
for sent_tuples in paragraph_tuples]
|
||||||
|
return merge_sents(docs)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _make_golds(cls, docs, paragraph_tuples):
|
def _make_golds(cls, docs, paragraph_tuples):
|
||||||
|
|
|
@ -334,7 +334,7 @@ class Language(object):
|
||||||
>>> for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
|
>>> for doc in nlp.pipe(texts, batch_size=50, n_threads=4):
|
||||||
>>> assert doc.is_parsed
|
>>> assert doc.is_parsed
|
||||||
"""
|
"""
|
||||||
docs = (self.make_doc(text) for text in texts)
|
#docs = (self.make_doc(text) for text in texts)
|
||||||
docs = texts
|
docs = texts
|
||||||
for proc in self.pipeline:
|
for proc in self.pipeline:
|
||||||
name = getattr(proc, 'name', None)
|
name = getattr(proc, 'name', None)
|
||||||
|
|
|
@ -215,7 +215,7 @@ cdef class Matcher:
|
||||||
"""
|
"""
|
||||||
return len(self._patterns)
|
return len(self._patterns)
|
||||||
|
|
||||||
def add(self, key, *patterns, **kwargs):
|
def add(self, key, on_match, *patterns):
|
||||||
"""Add a match-rule to the matcher.
|
"""Add a match-rule to the matcher.
|
||||||
A match-rule consists of: an ID key, an on_match callback, and one or
|
A match-rule consists of: an ID key, an on_match callback, and one or
|
||||||
more patterns. If the key exists, the patterns are appended to the
|
more patterns. If the key exists, the patterns are appended to the
|
||||||
|
@ -227,7 +227,6 @@ cdef class Matcher:
|
||||||
descriptors can also include quantifiers. There are currently important
|
descriptors can also include quantifiers. There are currently important
|
||||||
known problems with the quantifiers – see the docs.
|
known problems with the quantifiers – see the docs.
|
||||||
"""
|
"""
|
||||||
on_match = kwargs.get('on_match', None)
|
|
||||||
for pattern in patterns:
|
for pattern in patterns:
|
||||||
if len(pattern) == 0:
|
if len(pattern) == 0:
|
||||||
msg = ("Cannot add pattern for zero tokens to matcher.\n"
|
msg = ("Cannot add pattern for zero tokens to matcher.\n"
|
||||||
|
|
|
@ -167,7 +167,7 @@ class NeuralTagger(object):
|
||||||
self.model = model
|
self.model = model
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
tags = self.predict([doc.tensor])
|
tags = self.predict(doc.tensor)
|
||||||
self.set_annotations([doc], tags)
|
self.set_annotations([doc], tags)
|
||||||
|
|
||||||
def pipe(self, stream, batch_size=128, n_threads=-1):
|
def pipe(self, stream, batch_size=128, n_threads=-1):
|
||||||
|
@ -340,6 +340,24 @@ cdef class NeuralEntityRecognizer(NeuralParser):
|
||||||
|
|
||||||
nr_feature = 6
|
nr_feature = 6
|
||||||
|
|
||||||
|
def get_token_ids(self, states):
|
||||||
|
cdef StateClass state
|
||||||
|
cdef int n_tokens = 6
|
||||||
|
ids = numpy.zeros((len(states), n_tokens), dtype='i', order='c')
|
||||||
|
for i, state in enumerate(states):
|
||||||
|
ids[i, 0] = state.c.B(0)-1
|
||||||
|
ids[i, 1] = state.c.B(0)
|
||||||
|
ids[i, 2] = state.c.B(1)
|
||||||
|
ids[i, 3] = state.c.E(0)
|
||||||
|
ids[i, 4] = state.c.E(0)-1
|
||||||
|
ids[i, 5] = state.c.E(0)+1
|
||||||
|
for j in range(6):
|
||||||
|
if ids[i, j] >= state.c.length:
|
||||||
|
ids[i, j] = -1
|
||||||
|
if ids[i, j] >= 0:
|
||||||
|
ids[i, j] += state.c.offset
|
||||||
|
return ids
|
||||||
|
|
||||||
|
|
||||||
cdef class BeamDependencyParser(BeamParser):
|
cdef class BeamDependencyParser(BeamParser):
|
||||||
TransitionSystem = ArcEager
|
TransitionSystem = ArcEager
|
||||||
|
|
|
@ -15,7 +15,7 @@ cdef class Parser:
|
||||||
cdef readonly object cfg
|
cdef readonly object cfg
|
||||||
|
|
||||||
cdef void _parse_step(self, StateC* state,
|
cdef void _parse_step(self, StateC* state,
|
||||||
int* token_ids, float* scores, int* is_valid,
|
const float* feat_weights,
|
||||||
const float* feat_weights, int nr_class, int nr_feat) nogil
|
int nr_class, int nr_feat) nogil
|
||||||
|
|
||||||
#cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
|
#cdef int parseC(self, TokenC* tokens, int length, int nr_feat) nogil
|
||||||
|
|
|
@ -19,7 +19,6 @@ import numpy.random
|
||||||
cimport numpy as np
|
cimport numpy as np
|
||||||
|
|
||||||
from libcpp.vector cimport vector
|
from libcpp.vector cimport vector
|
||||||
from libcpp.pair cimport pair
|
|
||||||
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF
|
||||||
from cpython.exc cimport PyErr_CheckSignals
|
from cpython.exc cimport PyErr_CheckSignals
|
||||||
from libc.stdint cimport uint32_t, uint64_t
|
from libc.stdint cimport uint32_t, uint64_t
|
||||||
|
@ -69,9 +68,6 @@ def set_debug(val):
|
||||||
DEBUG = val
|
DEBUG = val
|
||||||
|
|
||||||
|
|
||||||
ctypedef pair[int, StateC*] step_t
|
|
||||||
|
|
||||||
|
|
||||||
cdef class precompute_hiddens:
|
cdef class precompute_hiddens:
|
||||||
'''Allow a model to be "primed" by pre-computing input features in bulk.
|
'''Allow a model to be "primed" by pre-computing input features in bulk.
|
||||||
|
|
||||||
|
@ -123,9 +119,6 @@ cdef class precompute_hiddens:
|
||||||
self._is_synchronized = True
|
self._is_synchronized = True
|
||||||
return <float*>self._cached.data
|
return <float*>self._cached.data
|
||||||
|
|
||||||
def get_bp_hiddens(self):
|
|
||||||
return self._bp_hiddens
|
|
||||||
|
|
||||||
def __call__(self, X):
|
def __call__(self, X):
|
||||||
return self.begin_update(X)[0]
|
return self.begin_update(X)[0]
|
||||||
|
|
||||||
|
@ -315,6 +308,7 @@ cdef class Parser:
|
||||||
cdef:
|
cdef:
|
||||||
precompute_hiddens state2vec
|
precompute_hiddens state2vec
|
||||||
StateClass state
|
StateClass state
|
||||||
|
Pool mem
|
||||||
const float* feat_weights
|
const float* feat_weights
|
||||||
StateC* st
|
StateC* st
|
||||||
vector[StateC*] next_step, this_step
|
vector[StateC*] next_step, this_step
|
||||||
|
@ -342,14 +336,7 @@ cdef class Parser:
|
||||||
cdef int i
|
cdef int i
|
||||||
while not next_step.empty():
|
while not next_step.empty():
|
||||||
for i in cython.parallel.prange(next_step.size(), num_threads=4, nogil=True):
|
for i in cython.parallel.prange(next_step.size(), num_threads=4, nogil=True):
|
||||||
token_ids = <int*>calloc(nr_feat, sizeof(int))
|
self._parse_step(next_step[i], feat_weights, nr_class, nr_feat)
|
||||||
scores = <float*>calloc(nr_class, sizeof(float))
|
|
||||||
is_valid = <int*>calloc(nr_class, sizeof(int))
|
|
||||||
self._parse_step(next_step[i], token_ids, scores, is_valid,
|
|
||||||
feat_weights, nr_class, nr_feat)
|
|
||||||
free(is_valid)
|
|
||||||
free(scores)
|
|
||||||
free(token_ids)
|
|
||||||
this_step, next_step = next_step, this_step
|
this_step, next_step = next_step, this_step
|
||||||
next_step.clear()
|
next_step.clear()
|
||||||
for st in this_step:
|
for st in this_step:
|
||||||
|
@ -358,8 +345,12 @@ cdef class Parser:
|
||||||
return states
|
return states
|
||||||
|
|
||||||
cdef void _parse_step(self, StateC* state,
|
cdef void _parse_step(self, StateC* state,
|
||||||
int* token_ids, float* scores, int* is_valid,
|
const float* feat_weights,
|
||||||
const float* feat_weights, int nr_class, int nr_feat) nogil:
|
int nr_class, int nr_feat) nogil:
|
||||||
|
token_ids = <int*>calloc(nr_feat, sizeof(int))
|
||||||
|
scores = <float*>calloc(nr_class, sizeof(float))
|
||||||
|
is_valid = <int*>calloc(nr_class, sizeof(int))
|
||||||
|
|
||||||
state.set_context_tokens(token_ids, nr_feat)
|
state.set_context_tokens(token_ids, nr_feat)
|
||||||
sum_state_features(scores,
|
sum_state_features(scores,
|
||||||
feat_weights, token_ids, 1, nr_feat, nr_class)
|
feat_weights, token_ids, 1, nr_feat, nr_class)
|
||||||
|
@ -368,90 +359,66 @@ cdef class Parser:
|
||||||
action = self.moves.c[guess]
|
action = self.moves.c[guess]
|
||||||
action.do(state, action.label)
|
action.do(state, action.label)
|
||||||
|
|
||||||
def update(self, docs_tokvecs, golds, drop=0., sgd=None):
|
free(is_valid)
|
||||||
cdef:
|
free(scores)
|
||||||
precompute_hiddens state2vec
|
free(token_ids)
|
||||||
StateClass state
|
|
||||||
const float* feat_weights
|
|
||||||
StateC* st
|
|
||||||
vector[step_t] next_step, this_step
|
|
||||||
cdef int[:, ::1] is_valid, token_ids
|
|
||||||
cdef float[:, ::1] scores, d_scores, costs
|
|
||||||
int nr_state, nr_feat, nr_class
|
|
||||||
|
|
||||||
|
def update(self, docs_tokvecs, golds, drop=0., sgd=None):
|
||||||
docs, tokvec_lists = docs_tokvecs
|
docs, tokvec_lists = docs_tokvecs
|
||||||
|
tokvecs = self.model[0].ops.flatten(tokvec_lists)
|
||||||
if isinstance(docs, Doc) and isinstance(golds, GoldParse):
|
if isinstance(docs, Doc) and isinstance(golds, GoldParse):
|
||||||
docs = [docs]
|
docs = [docs]
|
||||||
golds = [golds]
|
golds = [golds]
|
||||||
assert len(docs) == len(golds) == len(tokvec_lists)
|
|
||||||
|
|
||||||
nr_state = len(docs)
|
|
||||||
nr_feat = self.nr_feature
|
|
||||||
nr_class = self.moves.n_moves
|
|
||||||
|
|
||||||
token_ids = numpy.zeros((nr_state, nr_feat), dtype='i')
|
|
||||||
is_valid = numpy.zeros((nr_state, nr_class), dtype='i')
|
|
||||||
scores = numpy.zeros((nr_state, nr_class), dtype='f')
|
|
||||||
d_scores = numpy.zeros((nr_state, nr_class), dtype='f')
|
|
||||||
costs = numpy.zeros((nr_state, nr_class), dtype='f')
|
|
||||||
|
|
||||||
tokvecs = self.model[0].ops.flatten(tokvec_lists)
|
|
||||||
cuda_stream = get_cuda_stream()
|
cuda_stream = get_cuda_stream()
|
||||||
state2vec, vec2scores = self.get_batch_model(nr_state, tokvecs,
|
|
||||||
cuda_stream, drop)
|
|
||||||
|
|
||||||
golds = [self.moves.preprocess_gold(g) for g in golds]
|
golds = [self.moves.preprocess_gold(g) for g in golds]
|
||||||
|
|
||||||
states = self.moves.init_batch(docs)
|
states = self.moves.init_batch(docs)
|
||||||
cdef step_t step
|
state2vec, vec2scores = self.get_batch_model(len(states), tokvecs, cuda_stream,
|
||||||
cdef int i
|
drop)
|
||||||
for i, state in enumerate(states):
|
|
||||||
if not state.c.is_final():
|
todo = [(s, g) for (s, g) in zip(states, golds)
|
||||||
step.first = i
|
if not s.is_final() and g is not None]
|
||||||
step.second = state.c
|
|
||||||
next_step.push_back(step)
|
|
||||||
self.moves.set_costs(&is_valid[i, 0], &costs[i, 0], state, golds[i])
|
|
||||||
|
|
||||||
feat_weights = state2vec.get_feat_weights()
|
|
||||||
bp_hiddens = state2vec.get_bp_hiddens()
|
|
||||||
d_tokvecs = self.model[0].ops.allocate(tokvecs.shape)
|
|
||||||
backprops = []
|
backprops = []
|
||||||
|
cdef float loss = 0.
|
||||||
|
while len(todo) >= 3:
|
||||||
|
states, golds = zip(*todo)
|
||||||
|
|
||||||
while next_step.size():
|
token_ids = self.get_token_ids(states)
|
||||||
# Allocate these each step, so copy an be async
|
vector, bp_vector = state2vec.begin_update(token_ids, drop=drop)
|
||||||
np_token_ids = numpy.zeros((nr_state, nr_feat), dtype='i')
|
scores, bp_scores = vec2scores.begin_update(vector, drop=drop)
|
||||||
np_d_scores = numpy.zeros((nr_state, nr_class), dtype='f')
|
|
||||||
token_ids = np_token_ids
|
|
||||||
d_scores = np_d_scores
|
|
||||||
for step in next_step:
|
|
||||||
i = step.first
|
|
||||||
st = step.second
|
|
||||||
self._parse_step(st, &token_ids[i, 0],
|
|
||||||
&scores[i, 0], &is_valid[i, 0],
|
|
||||||
feat_weights, nr_class, nr_feat)
|
|
||||||
cpu_log_loss(&d_scores[i, 0],
|
|
||||||
&costs[i, 0], &is_valid[i, 0], &scores[i, 0], nr_class)
|
|
||||||
backprops.append((
|
|
||||||
get_async(cuda_stream, np_token_ids),
|
|
||||||
get_async(cuda_stream, np_d_scores)))
|
|
||||||
this_step, next_step = next_step, this_step
|
|
||||||
next_step.clear()
|
|
||||||
for step in this_step:
|
|
||||||
i = step.first
|
|
||||||
st = step.second
|
|
||||||
if not st.is_final():
|
|
||||||
next_step.push_back(step)
|
|
||||||
self.moves.set_costs(&is_valid[i, 0], &costs[i, 0],
|
|
||||||
states[i], golds[i])
|
|
||||||
cuda_stream.synchronize()
|
|
||||||
for gpu_token_ids, gpu_d_scores in backprops:
|
|
||||||
d_features = bp_hiddens((gpu_d_scores, gpu_token_ids), sgd)
|
|
||||||
d_features *= (gpu_token_ids >= 0).reshape((nr_state, nr_feat, 1))
|
|
||||||
|
|
||||||
xp = self.model[0].ops.xp
|
d_scores = self.get_batch_loss(states, golds, scores)
|
||||||
if hasattr(xp, 'scatter_add'):
|
d_vector = bp_scores(d_scores, sgd=sgd)
|
||||||
xp.scatter_add(d_tokvecs, gpu_token_ids, d_features)
|
|
||||||
|
if isinstance(self.model[0].ops, CupyOps) \
|
||||||
|
and not isinstance(token_ids, state2vec.ops.xp.ndarray):
|
||||||
|
# Move token_ids and d_vector to CPU, asynchronously
|
||||||
|
backprops.append((
|
||||||
|
get_async(cuda_stream, token_ids),
|
||||||
|
get_async(cuda_stream, d_vector),
|
||||||
|
bp_vector
|
||||||
|
))
|
||||||
else:
|
else:
|
||||||
xp.add.at(d_tokvecs, gpu_token_ids, d_features)
|
backprops.append((token_ids, d_vector, bp_vector))
|
||||||
|
self.transition_batch(states, scores)
|
||||||
|
todo = [st for st in todo if not st[0].is_final()]
|
||||||
|
# Tells CUDA to block, so our async copies complete.
|
||||||
|
if cuda_stream is not None:
|
||||||
|
cuda_stream.synchronize()
|
||||||
|
d_tokvecs = state2vec.ops.allocate(tokvecs.shape)
|
||||||
|
xp = state2vec.ops.xp # Handle for numpy/cupy
|
||||||
|
for token_ids, d_vector, bp_vector in backprops:
|
||||||
|
d_state_features = bp_vector(d_vector, sgd=sgd)
|
||||||
|
active_feats = token_ids * (token_ids >= 0)
|
||||||
|
active_feats = active_feats.reshape((token_ids.shape[0], token_ids.shape[1], 1))
|
||||||
|
if hasattr(xp, 'scatter_add'):
|
||||||
|
xp.scatter_add(d_tokvecs,
|
||||||
|
token_ids, d_state_features * active_feats)
|
||||||
|
else:
|
||||||
|
xp.add.at(d_tokvecs,
|
||||||
|
token_ids, d_state_features * active_feats)
|
||||||
return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
|
return self.model[0].ops.unflatten(d_tokvecs, [len(d) for d in docs])
|
||||||
|
|
||||||
def get_batch_model(self, batch_size, tokvecs, stream, dropout):
|
def get_batch_model(self, batch_size, tokvecs, stream, dropout):
|
||||||
|
|
|
@ -17,9 +17,8 @@ def test_issue429(EN):
|
||||||
|
|
||||||
doc = EN('a')
|
doc = EN('a')
|
||||||
matcher = Matcher(EN.vocab)
|
matcher = Matcher(EN.vocab)
|
||||||
matcher.add('TEST', [{'ORTH': 'a'}], on_match=merge_phrases)
|
matcher.add('TEST', on_match=merge_phrases, [{'ORTH': 'a'}])
|
||||||
doc = EN.make_doc('a b c')
|
doc = EN.tokenizer('a b c')
|
||||||
|
|
||||||
EN.tagger(doc)
|
EN.tagger(doc)
|
||||||
matcher(doc)
|
matcher(doc)
|
||||||
EN.entity(doc)
|
EN.entity(doc)
|
||||||
|
|
|
@ -1,8 +1,8 @@
|
||||||
# coding: utf-8
|
# coding: utf-8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
from ..matcher import Matcher, PhraseMatcher
|
from ...matcher import Matcher, PhraseMatcher
|
||||||
from .util import get_doc
|
from ..util import get_doc
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user