Tmp GPU code

2025-05-28 09:43:17 +03:00 · 2017-05-07 11:04:24 -05:00 · 2017-05-07 11:04:24 -05:00 · 6782eedf9b
commit 6782eedf9b
parent e420e5a809
5 changed files with 50 additions and 25 deletions
--- a/bin/parser/train_ud.py
+++ b/bin/parser/train_ud.py
@ -26,6 +26,15 @@ try:
 except ImportError:
    pass

+from thinc.neural import Model
+
+
+try:
+    import cupy
+    from thinc.neural.ops import CupyOps
+except:
+    cupy = None
+

 def read_conllx(loc, n=0):
    with io.open(loc, 'r', encoding='utf8') as file_:
@ -88,6 +97,8 @@ def organize_data(vocab, train_sents):


 def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
+    if cupy is not None:
+        Model.ops = CupyOps()
    LangClass = spacy.util.get_lang_class(lang_name)
    train_sents = list(read_conllx(train_loc))
    dev_sents = list(read_conllx(dev_loc))
@ -163,7 +174,7 @@ def main(lang_name, train_loc, dev_loc, model_dir, clusters_loc=None):
            print('%d:\t%.3f\t%.3f\t%.3f' % (itn, nn_loss[-1], scorer.uas, scorer.tags_acc))
            nn_loss.append(0.)
        trainer.each_epoch.append(track_progress)
-        trainer.batch_size = 12
+        trainer.batch_size = 24
        trainer.nb_epoch = 10
        for docs, golds in trainer.iterate(Xs, ys):
            docs = [Doc(vocab, words=[w.text for w in doc]) for doc in docs]
--- a/spacy/pipeline.pyx
+++ b/spacy/pipeline.pyx
@ -3,6 +3,7 @@ from __future__ import unicode_literals

 from thinc.api import chain, layerize, with_getitem
 from thinc.neural import Model, Softmax
+import numpy

 from .syntax.parser cimport Parser
 #from .syntax.beam_parser cimport BeamParser
@ -39,16 +40,15 @@ class TokenVectorEncoder(object):
    def update(self, docs, golds, drop=0., sgd=None):
        scores, finish_update = self.tagger.begin_update(docs, drop=drop)
        losses = scores.copy()
-        loss = 0.0
        idx = 0
        for i, gold in enumerate(golds):
+            ids = numpy.zeros((len(gold),), dtype='i')
+            start = idx
            for j, tag in enumerate(gold.tags):
-                tag_id = docs[0].vocab.morphology.tag_names.index(tag)
-                losses[idx, tag_id] -= 1.0
-                loss += 1-scores[idx, tag_id]
+                ids[j] = docs[0].vocab.morphology.tag_names.index(tag)
                idx += 1
+            self.tagger.ops.xp.scatter_add(losses[start:idx], ids, -1.0)
        finish_update(losses, sgd)
-        return loss


 cdef class EntityRecognizer(Parser):
--- a/spacy/syntax/parser.pyx
+++ b/spacy/syntax/parser.pyx
@ -201,7 +201,7 @@ cdef class Parser:
        costs = self.model.ops.allocate((len(docs), nr_class), dtype='f')
        gradients = self.model.ops.allocate((len(docs), nr_class), dtype='f')
        is_valid = self.model.ops.allocate((len(docs), nr_class), dtype='i')
-        attr_names = self.model.ops.allocate((2,), dtype='i')
+        attr_names = numpy.zeros((2,), dtype='i')
        attr_names[0] = TAG
        attr_names[1] = DEP

@ -228,10 +228,14 @@ cdef class Parser:
            scores, finish_update = self._begin_update(states, tokvecs)
            token_ids, batch_token_grads = finish_update(golds, sgd=sgd, losses=losses,
                                                         force_gold=False)
-            for i, tok_ids in enumerate(token_ids):
-                for j, tok_i in enumerate(tok_ids):
-                    if tok_i >= 0:
-                        d_tokens[i][tok_i] += batch_token_grads[i, j]
+            if hasattr(self.model.ops.xp, 'scatter_add'):
+                for i, tok_ids in enumerate(token_ids):
+                    self.model.ops.xp.scatter_add(d_tokens[i],
+                        tok_ids, batch_token_grads[i])
+            else:
+                for i, tok_ids in enumerate(token_ids):
+                    self.model.ops.xp.add.at(d_tokens[i],
+                        tok_ids, batch_token_grads[i])

            self._transition_batch(states, scores)

@ -244,7 +248,7 @@ cdef class Parser:

    def _begin_update(self, states, tokvecs, drop=0.):
        nr_class = self.moves.n_moves
-        attr_names = self.model.ops.allocate((2,), dtype='i')
+        attr_names = numpy.zeros((2,), dtype='i')
        attr_names[0] = TAG
        attr_names[1] = DEP

@ -284,28 +288,38 @@ cdef class Parser:
            nF=1, nB=0, nS=2, nL=2, nR=2):
        n_tokens = states[0].nr_context_tokens(nF, nB, nS, nL, nR)
        vector_length = all_tokvecs[0].shape[1]
-        tokens = self.model.ops.allocate((len(states), n_tokens), dtype='int32')
-        features = self.model.ops.allocate((len(states), n_tokens, attr_names.shape[0]), dtype='uint64')
+        cpu_tokens = numpy.zeros((len(states), n_tokens), dtype='int32')
+        features = numpy.zeros((len(states), n_tokens, attr_names.shape[0]), dtype='uint64')
        tokvecs = self.model.ops.allocate((len(states), n_tokens, vector_length), dtype='f')
        for i, state in enumerate(states):
-            state.set_context_tokens(tokens[i], nF, nB, nS, nL, nR)
-            state.set_attributes(features[i], tokens[i], attr_names)
-            state.set_token_vectors(tokvecs[i], all_tokvecs[i], tokens[i])
-        return (tokens, features, tokvecs)
+            state.set_context_tokens(cpu_tokens[i], nF, nB, nS, nL, nR)
+            #state.set_attributes(features[i], tokens[i], attr_names)
+        gpu_tokens = self.model.ops.xp.array(cpu_tokens)
+        for i in range(len(states)):
+            tokvecs[i] = all_tokvecs[i][gpu_tokens[i]]
+        tokvecs *= (gpu_tokens >= 0).reshape((gpu_tokens.shape[0], gpu_tokens.shape[1], 1))
+        return (gpu_tokens, self.model.ops.asarray(features), tokvecs)

-    def _validate_batch(self, int[:, ::1] is_valid, states):
+    def _validate_batch(self, is_valid, states):
        cdef StateClass state
        cdef int i
+        cdef int[:, :] is_valid_cpu = is_valid.get()
        for i, state in enumerate(states):
-            self.moves.set_valid(&is_valid[i, 0], state.c)
+            self.moves.set_valid(&is_valid_cpu[i, 0], state.c)
+        is_valid.set(numpy.asarray(is_valid_cpu))

-    def _cost_batch(self, weight_t[:, ::1] costs, int[:, ::1] is_valid,
+    def _cost_batch(self, costs, is_valid,
            states, golds):
        cdef int i
        cdef StateClass state
        cdef GoldParse gold
+        cdef int[:, :] is_valid_cpu = is_valid.get()
+        cdef weight_t[:, :] costs_cpu = costs.get()
+
        for i, (state, gold) in enumerate(zip(states, golds)):
-            self.moves.set_costs(&is_valid[i, 0], &costs[i, 0], state, gold)
+            self.moves.set_costs(&is_valid_cpu[i, 0], &costs_cpu[i, 0], state, gold)
+        is_valid.set(numpy.asarray(is_valid_cpu))
+        costs.set(numpy.asarray(costs_cpu))

    def _transition_batch(self, states, scores):
        cdef StateClass state
--- a/spacy/syntax/stateclass.pyx
+++ b/spacy/syntax/stateclass.pyx
@ -76,8 +76,8 @@ cdef class StateClass:
            else:
                vals[i] = 0

-    def set_token_vectors(self, float[:, :] tokvecs,
-            float[:, :] all_tokvecs, int[:] indices):
+    def set_token_vectors(self, tokvecs,
+            all_tokvecs, int[:] indices):
        for i in range(indices.shape[0]):
            if indices[i] >= 0:
                tokvecs[i] = all_tokvecs[indices[i]]
--- a/spacy/tokens/doc.pxd
+++ b/spacy/tokens/doc.pxd
@ -32,7 +32,7 @@ cdef class Doc:
    cdef public object _vector
    cdef public object _vector_norm

-    cdef public np.ndarray tensor
+    cdef public object tensor
    cdef public object user_data

    cdef TokenC* c