Merge pull request #2104 from explosion/feature/single-thread

Update parser for Thinc 6.11.0
2025-11-25 04:16:11 +03:00 · 2018-03-16 04:28:56 +01:00 · 2018-03-16 04:28:56 +01:00 · 3cdfe1ee4d
commit 3cdfe1ee4d
parent 0d17377e8b 39c50225e8
6 changed files with 64 additions and 49 deletions
--- a/requirements.txt
+++ b/requirements.txt
@ -3,8 +3,9 @@ pathlib
 numpy>=1.7
 cymem>=1.30,<1.32
 preshed>=1.0.0,<2.0.0
-thinc>=6.10.1,<6.11.0
+thinc>=6.11.1.dev2,<6.12.0
 murmurhash>=0.28,<0.29
+cytoolz>=0.9.0,<0.10.0
 plac<1.0.0,>=0.9.6
 ujson>=1.35
 dill>=0.2,<0.3
--- a/setup.py
+++ b/setup.py
@ -190,7 +190,7 @@ def setup_package():
                'murmurhash>=0.28,<0.29',
                'cymem>=1.30,<1.32',
                'preshed>=1.0.0,<2.0.0',
-                'thinc>=6.10.1,<6.11.0',
+                'thinc>=6.11.1.dev2,<6.12.0',
                'plac<1.0.0,>=0.9.6',
                'pathlib',
                'ujson>=1.35',
--- a/spacy/_ml.py
+++ b/spacy/_ml.py
@ -144,8 +144,8 @@ class PrecomputableAffine(Model):
        self.nF = nF

    def begin_update(self, X, drop=0.):
-        Yf = self.ops.xp.dot(X,
-            self.W.reshape((self.nF*self.nO*self.nP, self.nI)).T)
+        Yf = self.ops.gemm(X,
+            self.W.reshape((self.nF*self.nO*self.nP, self.nI)), trans2=True)
        Yf = Yf.reshape((Yf.shape[0], self.nF, self.nO, self.nP))
        Yf = self._add_padding(Yf)

@ -161,11 +161,11 @@ class PrecomputableAffine(Model):
            Wopfi = self.W.transpose((1, 2, 0, 3))
            Wopfi = self.ops.xp.ascontiguousarray(Wopfi)
            Wopfi = Wopfi.reshape((self.nO*self.nP, self.nF * self.nI))
-            dXf = self.ops.dot(dY.reshape((dY.shape[0], self.nO*self.nP)), Wopfi)
+            dXf = self.ops.gemm(dY.reshape((dY.shape[0], self.nO*self.nP)), Wopfi)

            # Reuse the buffer
            dWopfi = Wopfi; dWopfi.fill(0.)
-            self.ops.xp.dot(dY.T, Xf, out=dWopfi)
+            self.ops.gemm(dY, Xf, out=dWopfi, trans1=True)
            dWopfi = dWopfi.reshape((self.nO, self.nP, self.nF, self.nI))
            # (o, p, f, i) --> (f, o, p, i)
            self.d_W += dWopfi.transpose((2, 0, 1, 3))
--- a/spacy/cli/ud_train.py
+++ b/spacy/cli/ud_train.py
@ -13,6 +13,7 @@ import spacy
 import spacy.util
 from ..tokens import Token, Doc
 from ..gold import GoldParse
+from ..util import compounding
 from ..syntax.nonproj import projectivize
 from ..matcher import Matcher
 from collections import defaultdict, Counter
@ -36,7 +37,7 @@ lang.ja.Japanese.Defaults.use_janome = False
 random.seed(0)
 numpy.random.seed(0)

-def minibatch_by_words(items, size=5000):
+def minibatch_by_words(items, size):
    random.shuffle(items)
    if isinstance(size, int):
        size_ = itertools.repeat(size)
@ -368,9 +369,10 @@ def main(ud_dir, parses_dir, config, corpus, limit=0):

    optimizer = initialize_pipeline(nlp, docs, golds, config)

+    batch_sizes = compounding(config.batch_size //10, config.batch_size, 1.001)
    for i in range(config.nr_epoch):
        docs = [nlp.make_doc(doc.text) for doc in docs]
-        batches = minibatch_by_words(list(zip(docs, golds)), size=config.batch_size)
+        batches = minibatch_by_words(list(zip(docs, golds)), size=batch_sizes)
        losses = {}
        n_train_words = sum(len(doc) for doc in docs)
        with tqdm.tqdm(total=n_train_words, leave=False) as pbar:
--- a/spacy/syntax/nn_parser.pxd
+++ b/spacy/syntax/nn_parser.pxd
@ -15,7 +15,7 @@ cdef class Parser:
    cdef readonly object cfg
    cdef public object _multitasks

-    cdef void _parseC(self, StateC* state, 
+    cdef void _parseC(self, StateC** states, int nr_task, 
            const float* feat_weights, const float* bias,
            const float* hW, const float* hb,
            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -1,7 +1,6 @@
 # cython: infer_types=True
 # cython: cdivision=True
 # cython: boundscheck=False
-# cython: profile=True
 # coding: utf-8
 from __future__ import unicode_literals, print_function

@ -29,6 +28,8 @@ from thinc.neural.ops import CupyOps
 from thinc.neural.util import get_array_module
 from thinc.linalg cimport Vec, VecVec

+from thinc.linalg cimport MatVec, VecVec
+
 from .._ml import zero_init, PrecomputableAffine, Tok2Vec, flatten
 from .._ml import link_vectors_to_models, create_default_optimizer
 from ..compat import json_dumps, copy_array
@ -171,8 +172,8 @@ cdef void sum_state_features(float* output,
            else:
                idx = token_ids[f] * F * O + f*O
                feature = &cached[idx]
-            for i in range(O):
-                output[i] += feature[i]
+            VecVec.add_i(output,
+                feature, 1., O)
        output += O
        token_ids += F

@ -265,7 +266,7 @@ cdef class Parser:

        with Model.use_device('cpu'):
            upper = chain(
-                clone(LayerNorm(Maxout(hidden_width, hidden_width)), depth-1),
+                clone(Maxout(hidden_width, hidden_width), depth-1),
                zero_init(Affine(nr_class, hidden_width, drop_factor=0.0))
            )

@ -422,59 +423,70 @@ cdef class Parser:
        cdef int nr_hidden = hidden_weights.shape[0]
        cdef int nr_task = states.size()
        with nogil:
-            for i in range(nr_task):
-                self._parseC(states[i],
-                    feat_weights, bias, hW, hb,
-                    nr_class, nr_hidden, nr_feat, nr_piece)
+            self._parseC(&states[0], nr_task, feat_weights, bias, hW, hb,
+                nr_class, nr_hidden, nr_feat, nr_piece)
        PyErr_CheckSignals()
        tokvecs = self.model[0].ops.unflatten(tokvecs,
                                    [len(doc) for doc in docs])
        return state_objs, tokvecs

-    cdef void _parseC(self, StateC* state, 
+    cdef void _parseC(self, StateC** states, int nr_task, 
            const float* feat_weights, const float* bias,
            const float* hW, const float* hb,
            int nr_class, int nr_hidden, int nr_feat, int nr_piece) nogil:
        token_ids = <int*>calloc(nr_feat, sizeof(int))
        is_valid = <int*>calloc(nr_class, sizeof(int))
-        vectors = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
-        scores = <float*>calloc(nr_class, sizeof(float))
+        vectors = <float*>calloc(nr_hidden * nr_task, sizeof(float))
+        unmaxed = <float*>calloc(nr_hidden * nr_piece, sizeof(float))
+        scores = <float*>calloc(nr_class*nr_task, sizeof(float))
        if not (token_ids and is_valid and vectors and scores):
            with gil:
                PyErr_SetFromErrno(MemoryError)
                PyErr_CheckSignals()
-        cdef float feature
-        while not state.is_final():
-            state.set_context_tokens(token_ids, nr_feat)
-            memset(vectors, 0, nr_hidden * nr_piece * sizeof(float))
-            memset(scores, 0, nr_class * sizeof(float))
-            sum_state_features(vectors,
-                feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
-            for i in range(nr_hidden * nr_piece):
-                vectors[i] += bias[i]
-            V = vectors
-            W = hW
-            for i in range(nr_hidden):
-                if nr_piece == 1:
-                    feature = V[0] if V[0] >= 0. else 0.
-                elif nr_piece == 2:
-                    feature = V[0] if V[0] >= V[1] else V[1]
-                else:
-                    feature = Vec.max(V, nr_piece)
-                for j in range(nr_class):
-                    scores[j] += feature * W[j]
-                W += nr_class
-                V += nr_piece
-            for i in range(nr_class):
-                scores[i] += hb[i]
-            self.moves.set_valid(is_valid, state)
-            guess = arg_max_if_valid(scores, is_valid, nr_class)
-            action = self.moves.c[guess]
-            action.do(state, action.label)
-            state.push_hist(guess)
+        cdef int nr_todo = nr_task
+        cdef int i, j
+        cdef vector[StateC*] unfinished
+        while nr_todo >= 1:
+            memset(vectors, 0, nr_todo * nr_hidden * sizeof(float))
+            memset(scores, 0, nr_todo * nr_class * sizeof(float))
+            for i in range(nr_todo):
+                state = states[i]
+                state.set_context_tokens(token_ids, nr_feat)
+                memset(unmaxed, 0, nr_hidden * nr_piece * sizeof(float))
+                sum_state_features(unmaxed,
+                    feat_weights, token_ids, 1, nr_feat, nr_hidden * nr_piece)
+                VecVec.add_i(unmaxed,
+                    bias, 1., nr_hidden*nr_piece)
+                state_vector = &vectors[i*nr_hidden]
+                for j in range(nr_hidden):
+                    index = j * nr_piece
+                    which = Vec.arg_max(&unmaxed[index], nr_piece)
+                    state_vector[j] = unmaxed[index + which]
+            # Compute hidden-to-output
+            MatVec.batch_dot(scores,
+                hW, vectors, nr_class, nr_hidden, nr_todo)
+            # Add bias
+            for i in range(nr_todo):
+                VecVec.add_i(&scores[i*nr_class],
+                    hb, 1., nr_class)
+            # Validate actions, argmax, take action.
+            for i in range(nr_todo):
+                state = states[i]
+                self.moves.set_valid(is_valid, state)
+                guess = arg_max_if_valid(&scores[i*nr_class], is_valid, nr_class)
+                action = self.moves.c[guess]
+                action.do(state, action.label)
+                state.push_hist(guess)
+                if not state.is_final():
+                    unfinished.push_back(state)
+            for i in range(unfinished.size()):
+                states[i] = unfinished[i]
+            nr_todo = unfinished.size()
+            unfinished.clear()
        free(token_ids)
        free(is_valid)
        free(vectors)
+        free(unmaxed)
        free(scores)

    def beam_parse(self, docs, int beam_width=3, float beam_density=0.001,