mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge branch 'develop' of https://github.com/explosion/spaCy into develop
This commit is contained in:
		
						commit
						931509d96a
					
				
							
								
								
									
										82
									
								
								spacy/_ml.py
									
									
									
									
									
								
							
							
						
						
									
										82
									
								
								spacy/_ml.py
									
									
									
									
									
								
							| 
						 | 
				
			
			@ -5,6 +5,7 @@ from thinc.neural._classes.hash_embed import HashEmbed
 | 
			
		|||
from thinc.neural.ops import NumpyOps, CupyOps
 | 
			
		||||
from thinc.neural.util import get_array_module
 | 
			
		||||
import random
 | 
			
		||||
import cytoolz
 | 
			
		||||
 | 
			
		||||
from thinc.neural._classes.convolution import ExtractWindow
 | 
			
		||||
from thinc.neural._classes.static_vectors import StaticVectors
 | 
			
		||||
| 
						 | 
				
			
			@ -25,6 +26,7 @@ from thinc.api import uniqued, wrap, flatten_add_lengths
 | 
			
		|||
 | 
			
		||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
 | 
			
		||||
from .tokens.doc import Doc
 | 
			
		||||
from . import util
 | 
			
		||||
 | 
			
		||||
import numpy
 | 
			
		||||
import io
 | 
			
		||||
| 
						 | 
				
			
			@ -55,6 +57,27 @@ def _logistic(X, drop=0.):
 | 
			
		|||
    return Y, logistic_bwd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@layerize
 | 
			
		||||
def add_tuples(X, drop=0.):
 | 
			
		||||
    """Give inputs of sequence pairs, where each sequence is (vals, length),
 | 
			
		||||
    sum the values, returning a single sequence.
 | 
			
		||||
 | 
			
		||||
    If input is:
 | 
			
		||||
    ((vals1, length), (vals2, length)
 | 
			
		||||
    Output is:
 | 
			
		||||
    (vals1+vals2, length)
 | 
			
		||||
 | 
			
		||||
    vals are a single tensor for the whole batch.
 | 
			
		||||
    """
 | 
			
		||||
    (vals1, length1), (vals2, length2) = X
 | 
			
		||||
    assert length1 == length2
 | 
			
		||||
 | 
			
		||||
    def add_tuples_bwd(dY, sgd=None):
 | 
			
		||||
        return (dY, dY)
 | 
			
		||||
 | 
			
		||||
    return (vals1+vals2, length), add_tuples_bwd
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _zero_init(model):
 | 
			
		||||
    def _zero_init_impl(self, X, y):
 | 
			
		||||
        self.W.fill(0)
 | 
			
		||||
| 
						 | 
				
			
			@ -63,6 +86,7 @@ def _zero_init(model):
 | 
			
		|||
        model.W.fill(0.)
 | 
			
		||||
    return model
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@layerize
 | 
			
		||||
def _preprocess_doc(docs, drop=0.):
 | 
			
		||||
    keys = [doc.to_array([LOWER]) for doc in docs]
 | 
			
		||||
| 
						 | 
				
			
			@ -74,7 +98,6 @@ def _preprocess_doc(docs, drop=0.):
 | 
			
		|||
    return (keys, vals, lengths), None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _init_for_precomputed(W, ops):
 | 
			
		||||
    if (W**2).sum() != 0.:
 | 
			
		||||
        return
 | 
			
		||||
| 
						 | 
				
			
			@ -82,6 +105,7 @@ def _init_for_precomputed(W, ops):
 | 
			
		|||
    ops.xavier_uniform_init(reshaped)
 | 
			
		||||
    W[:] = reshaped.reshape(W.shape)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@describe.on_data(_set_dimensions_if_needed)
 | 
			
		||||
@describe.attributes(
 | 
			
		||||
    nI=Dimension("Input size"),
 | 
			
		||||
| 
						 | 
				
			
			@ -186,8 +210,19 @@ class PrecomputableMaxouts(Model):
 | 
			
		|||
        return Yfp, backward
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def drop_layer(layer, factor=2.):
 | 
			
		||||
    def drop_layer_fwd(X, drop=0.):
 | 
			
		||||
        drop *= factor
 | 
			
		||||
        mask = layer.ops.get_dropout_mask((1,), drop)
 | 
			
		||||
        if mask is None or mask > 0:
 | 
			
		||||
            return layer.begin_update(X, drop=drop)
 | 
			
		||||
        else:
 | 
			
		||||
            return X, lambda dX, sgd=None: dX
 | 
			
		||||
    return wrap(drop_layer_fwd, layer)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def Tok2Vec(width, embed_size, preprocess=None):
 | 
			
		||||
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE]
 | 
			
		||||
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
 | 
			
		||||
    with Model.define_operators({'>>': chain, '|': concatenate, '**': clone, '+': add}):
 | 
			
		||||
        norm = get_col(cols.index(NORM))     >> HashEmbed(width, embed_size, name='embed_lower')
 | 
			
		||||
        prefix = get_col(cols.index(PREFIX)) >> HashEmbed(width, embed_size//2, name='embed_prefix')
 | 
			
		||||
| 
						 | 
				
			
			@ -299,7 +334,8 @@ def zero_init(model):
 | 
			
		|||
 | 
			
		||||
 | 
			
		||||
def doc2feats(cols=None):
 | 
			
		||||
    cols = [ID, NORM, PREFIX, SUFFIX, SHAPE]
 | 
			
		||||
    if cols is None:
 | 
			
		||||
        cols = [ID, NORM, PREFIX, SUFFIX, SHAPE, ORTH]
 | 
			
		||||
    def forward(docs, drop=0.):
 | 
			
		||||
        feats = []
 | 
			
		||||
        for doc in docs:
 | 
			
		||||
| 
						 | 
				
			
			@ -336,25 +372,22 @@ def fine_tune(embedding, combine=None):
 | 
			
		|||
        vecs, bp_vecs = embedding.begin_update(docs, drop=drop)
 | 
			
		||||
        flat_tokvecs = embedding.ops.flatten(tokvecs)
 | 
			
		||||
        flat_vecs = embedding.ops.flatten(vecs)
 | 
			
		||||
        alpha = model.mix
 | 
			
		||||
        minus = 1-model.mix
 | 
			
		||||
        output = embedding.ops.unflatten(
 | 
			
		||||
                   (alpha * flat_tokvecs + minus * flat_vecs), lengths)
 | 
			
		||||
                   (model.mix[0] * flat_vecs + model.mix[1] * flat_tokvecs),
 | 
			
		||||
                    lengths)
 | 
			
		||||
 | 
			
		||||
        def fine_tune_bwd(d_output, sgd=None):
 | 
			
		||||
            bp_vecs(d_output, sgd=sgd)
 | 
			
		||||
            flat_grad = model.ops.flatten(d_output)
 | 
			
		||||
            model.d_mix += flat_tokvecs.dot(flat_grad.T).sum()
 | 
			
		||||
            model.d_mix += 1-flat_vecs.dot(flat_grad.T).sum()
 | 
			
		||||
            
 | 
			
		||||
            bp_vecs([d_o * minus for d_o in d_output], sgd=sgd)
 | 
			
		||||
            d_output = [d_o * alpha for d_o in d_output]
 | 
			
		||||
            model.d_mix[1] += flat_tokvecs.dot(flat_grad.T).sum()
 | 
			
		||||
            model.d_mix[0] += flat_vecs.dot(flat_grad.T).sum()
 | 
			
		||||
            if sgd is not None:
 | 
			
		||||
                sgd(model._mem.weights, model._mem.gradient, key=model.id)
 | 
			
		||||
            model.mix = model.ops.xp.minimum(model.mix, 1.0)
 | 
			
		||||
            return d_output
 | 
			
		||||
        return output, fine_tune_bwd
 | 
			
		||||
    model = wrap(fine_tune_fwd, embedding)
 | 
			
		||||
    model.mix = model._mem.add((model.id, 'mix'), (1,))
 | 
			
		||||
    model.mix.fill(0.0)
 | 
			
		||||
    model.mix = model._mem.add((model.id, 'mix'), (2,))
 | 
			
		||||
    model.mix.fill(1.)
 | 
			
		||||
    model.d_mix = model._mem.add_gradient((model.id, 'd_mix'), (model.id, 'mix'))
 | 
			
		||||
    return model
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -405,6 +438,27 @@ def preprocess_doc(docs, drop=0.):
 | 
			
		|||
    vals = ops.allocate(keys.shape[0]) + 1
 | 
			
		||||
    return (keys, vals, lengths), None
 | 
			
		||||
 | 
			
		||||
def getitem(i):
 | 
			
		||||
    def getitem_fwd(X, drop=0.):
 | 
			
		||||
        return X[i], None
 | 
			
		||||
    return layerize(getitem_fwd)
 | 
			
		||||
 | 
			
		||||
def build_tagger_model(nr_class, token_vector_width, **cfg):
 | 
			
		||||
    embed_size = util.env_opt('embed_size', 7500)
 | 
			
		||||
    with Model.define_operators({'>>': chain, '+': add}):
 | 
			
		||||
        # Input: (doc, tensor) tuples
 | 
			
		||||
        private_tok2vec = Tok2Vec(token_vector_width, embed_size, preprocess=doc2feats())
 | 
			
		||||
 | 
			
		||||
        model = (
 | 
			
		||||
            fine_tune(private_tok2vec)
 | 
			
		||||
            >> with_flatten(
 | 
			
		||||
                Maxout(token_vector_width, token_vector_width)
 | 
			
		||||
                >> Softmax(nr_class, token_vector_width)
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
    model.nI = None
 | 
			
		||||
    return model
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def build_text_classifier(nr_class, width=64, **cfg):
 | 
			
		||||
    nr_vector = cfg.get('nr_vector', 200)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user