mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-09 15:52:31 +03:00
WIP: Add fine-tuning logic to tagger model, re #1182
This commit is contained in:
parent
7f876a7a82
commit
468c138ab3
70
spacy/_ml.py
70
spacy/_ml.py
|
@ -19,7 +19,7 @@ from thinc.api import FeatureExtracter, with_getitem
|
||||||
from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
|
from thinc.neural.pooling import Pooling, max_pool, mean_pool, sum_pool
|
||||||
from thinc.neural._classes.attention import ParametricAttention
|
from thinc.neural._classes.attention import ParametricAttention
|
||||||
from thinc.linear.linear import LinearModel
|
from thinc.linear.linear import LinearModel
|
||||||
from thinc.api import uniqued, wrap
|
from thinc.api import uniqued, wrap, flatten_add_lengths
|
||||||
|
|
||||||
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
|
from .attrs import ID, ORTH, LOWER, NORM, PREFIX, SUFFIX, SHAPE, TAG, DEP
|
||||||
from .tokens.doc import Doc
|
from .tokens.doc import Doc
|
||||||
|
@ -53,6 +53,27 @@ def _logistic(X, drop=0.):
|
||||||
return Y, logistic_bwd
|
return Y, logistic_bwd
|
||||||
|
|
||||||
|
|
||||||
|
@layerize
|
||||||
|
def add_tuples(X, drop=0.):
|
||||||
|
"""Give inputs of sequence pairs, where each sequence is (vals, length),
|
||||||
|
sum the values, returning a single sequence.
|
||||||
|
|
||||||
|
If input is:
|
||||||
|
((vals1, length), (vals2, length)
|
||||||
|
Output is:
|
||||||
|
(vals1+vals2, length)
|
||||||
|
|
||||||
|
vals are a single tensor for the whole batch.
|
||||||
|
"""
|
||||||
|
(vals1, length1), (vals2, length2) = X
|
||||||
|
assert length1 == length2
|
||||||
|
|
||||||
|
def add_tuples_bwd(dY, sgd=None):
|
||||||
|
return (dY, dY)
|
||||||
|
|
||||||
|
return (vals1+vals2, length), add_tuples_bwd
|
||||||
|
|
||||||
|
|
||||||
def _zero_init(model):
|
def _zero_init(model):
|
||||||
def _zero_init_impl(self, X, y):
|
def _zero_init_impl(self, X, y):
|
||||||
self.W.fill(0)
|
self.W.fill(0)
|
||||||
|
@ -61,6 +82,7 @@ def _zero_init(model):
|
||||||
model.W.fill(0.)
|
model.W.fill(0.)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
@layerize
|
@layerize
|
||||||
def _preprocess_doc(docs, drop=0.):
|
def _preprocess_doc(docs, drop=0.):
|
||||||
keys = [doc.to_array([LOWER]) for doc in docs]
|
keys = [doc.to_array([LOWER]) for doc in docs]
|
||||||
|
@ -72,7 +94,6 @@ def _preprocess_doc(docs, drop=0.):
|
||||||
return (keys, vals, lengths), None
|
return (keys, vals, lengths), None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _init_for_precomputed(W, ops):
|
def _init_for_precomputed(W, ops):
|
||||||
if (W**2).sum() != 0.:
|
if (W**2).sum() != 0.:
|
||||||
return
|
return
|
||||||
|
@ -80,6 +101,7 @@ def _init_for_precomputed(W, ops):
|
||||||
ops.xavier_uniform_init(reshaped)
|
ops.xavier_uniform_init(reshaped)
|
||||||
W[:] = reshaped.reshape(W.shape)
|
W[:] = reshaped.reshape(W.shape)
|
||||||
|
|
||||||
|
|
||||||
@describe.on_data(_set_dimensions_if_needed)
|
@describe.on_data(_set_dimensions_if_needed)
|
||||||
@describe.attributes(
|
@describe.attributes(
|
||||||
nI=Dimension("Input size"),
|
nI=Dimension("Input size"),
|
||||||
|
@ -323,6 +345,21 @@ def get_token_vectors(tokens_attrs_vectors, drop=0.):
|
||||||
return vectors, backward
|
return vectors, backward
|
||||||
|
|
||||||
|
|
||||||
|
def fine_tune(model1, combine=None):
|
||||||
|
def fine_tune_fwd(docs, drop=0.):
|
||||||
|
X1, bp_X1 = model1.begin_update(docs)
|
||||||
|
lengths = [len(doc) for doc in docs]
|
||||||
|
X2 = model1.ops.flatten(X1)
|
||||||
|
|
||||||
|
def fine_tune_bwd(d_output, sgd=None):
|
||||||
|
bp_X1(d_output, sgd=sgd)
|
||||||
|
return d_output
|
||||||
|
|
||||||
|
return (X1+X2, lengths), fine_tune_bwd
|
||||||
|
model = wrap(fine_tune_fwd)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
@layerize
|
@layerize
|
||||||
def flatten(seqs, drop=0.):
|
def flatten(seqs, drop=0.):
|
||||||
if isinstance(seqs[0], numpy.ndarray):
|
if isinstance(seqs[0], numpy.ndarray):
|
||||||
|
@ -370,6 +407,35 @@ def preprocess_doc(docs, drop=0.):
|
||||||
return (keys, vals, lengths), None
|
return (keys, vals, lengths), None
|
||||||
|
|
||||||
|
|
||||||
|
def build_tagger_model(nr_class, token_vector_width, **cfg):
|
||||||
|
with Model.define_operators({'>>': chain, '+': add}):
|
||||||
|
# Input: (doc, tensor) tuples
|
||||||
|
embed_docs = with_getitem(0,
|
||||||
|
FeatureExtracter([NORM])
|
||||||
|
>> HashEmbed(token_vector_width, 1000)
|
||||||
|
>> flatten_add_lengths
|
||||||
|
)
|
||||||
|
|
||||||
|
model = (
|
||||||
|
fine_tune(embed_docs)
|
||||||
|
>>
|
||||||
|
with_getitem(0,
|
||||||
|
FeatureExtracter([NORM])
|
||||||
|
>> HashEmbed(token_vector_width, 1000)
|
||||||
|
>> flatten_add_lengths
|
||||||
|
)
|
||||||
|
>> with_getitem(1,
|
||||||
|
flatten_add_lengths)
|
||||||
|
>> add_tuples
|
||||||
|
>> with_flatten(
|
||||||
|
Maxout(token_vector_width, token_vector_width)
|
||||||
|
>> Softmax(nr_class, token_vector_width)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def build_text_classifier(nr_class, width=64, **cfg):
|
def build_text_classifier(nr_class, width=64, **cfg):
|
||||||
nr_vector = cfg.get('nr_vector', 200)
|
nr_vector = cfg.get('nr_vector', 200)
|
||||||
with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}):
|
with Model.define_operators({'>>': chain, '+': add, '|': concatenate, '**': clone}):
|
||||||
|
|
|
@ -42,7 +42,7 @@ from .compat import json_dumps
|
||||||
|
|
||||||
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
|
from .attrs import ID, LOWER, PREFIX, SUFFIX, SHAPE, TAG, DEP, POS
|
||||||
from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
|
from ._ml import rebatch, Tok2Vec, flatten, get_col, doc2feats
|
||||||
from ._ml import build_text_classifier
|
from ._ml import build_text_classifier, build_tagger_model
|
||||||
from .parts_of_speech import X
|
from .parts_of_speech import X
|
||||||
|
|
||||||
|
|
||||||
|
@ -346,9 +346,7 @@ class NeuralTagger(BaseThincComponent):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, n_tags, token_vector_width):
|
def Model(cls, n_tags, token_vector_width):
|
||||||
return with_flatten(
|
return build_tagger_model(n_tags, token_vector_width)
|
||||||
chain(Maxout(token_vector_width, token_vector_width),
|
|
||||||
Softmax(n_tags, token_vector_width)))
|
|
||||||
|
|
||||||
def use_params(self, params):
|
def use_params(self, params):
|
||||||
with self.model.use_params(params):
|
with self.model.use_params(params):
|
||||||
|
@ -455,9 +453,7 @@ class NeuralLabeller(NeuralTagger):
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, n_tags, token_vector_width):
|
def Model(cls, n_tags, token_vector_width):
|
||||||
return with_flatten(
|
return build_tagger_model(n_tags, token_vector_width)
|
||||||
chain(Maxout(token_vector_width, token_vector_width),
|
|
||||||
Softmax(n_tags, token_vector_width)))
|
|
||||||
|
|
||||||
def get_loss(self, docs, golds, scores):
|
def get_loss(self, docs, golds, scores):
|
||||||
scores = self.model.ops.flatten(scores)
|
scores = self.model.ops.flatten(scores)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user