From d43a83e37a6ccb0087db8f77c916761d81c94afa Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 9 Oct 2017 03:35:40 +0200 Subject: [PATCH 1/3] Allow parser.add_label for pretrained models --- spacy/syntax/nn_parser.pyx | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx index 153f7a484..daebcac7b 100644 --- a/spacy/syntax/nn_parser.pyx +++ b/spacy/syntax/nn_parser.pyx @@ -51,7 +51,7 @@ from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune from .._ml import Residual, drop_layer, flatten from .._ml import link_vectors_to_models from .._ml import HistoryFeatures -from ..compat import json_dumps +from ..compat import json_dumps, copy_array from . import _parse_features from ._parse_features cimport CONTEXT_SIZE @@ -781,12 +781,22 @@ cdef class Parser: self.moves.finalize_doc(doc) def add_label(self, label): + resized = False for action in self.moves.action_types: added = self.moves.add_action(action, label) if added: # Important that the labels be stored as a list! We need the # order, or the model goes out of synch self.cfg.setdefault('extra_labels', []).append(label) + resized = True + if self.model not in (True, False, None) and resized: + # Weights are stored in (nr_out, nr_in) format, so we're basically + # just adding rows here. + smaller = self.model[-1]._layers[-1] + larger = Affine(self.moves.n_moves, smaller.nI) + copy_array(larger.W[:smaller.nO], smaller.W) + copy_array(larger.b[:smaller.nO], smaller.b) + self.model[-1]._layers[-1] = larger def begin_training(self, gold_tuples, pipeline=None, **cfg): if 'model' in cfg: From b2b8506f2c8b984864dabb0daeafa4e86c079231 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 9 Oct 2017 03:35:57 +0200 Subject: [PATCH 2/3] Remove whitespace --- spacy/_ml.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/spacy/_ml.py b/spacy/_ml.py index 23facb9fb..62e0ceb9a 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -743,5 +743,3 @@ def concatenate_lists(*layers, **kwargs): # pragma: no cover return ys, concatenate_lists_bwd model = wrap(concatenate_lists_fwd, concat) return model - - From dde87e6b0d2de331e536d335ead00db5d181ee96 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 9 Oct 2017 03:42:35 +0200 Subject: [PATCH 3/3] Add tests for adding parser actions --- spacy/tests/parser/test_add_label.py | 68 ++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100644 spacy/tests/parser/test_add_label.py diff --git a/spacy/tests/parser/test_add_label.py b/spacy/tests/parser/test_add_label.py new file mode 100644 index 000000000..b89cca113 --- /dev/null +++ b/spacy/tests/parser/test_add_label.py @@ -0,0 +1,68 @@ +'''Test the ability to add a label to a (potentially trained) parsing model.''' +from __future__ import unicode_literals +import pytest +import numpy.random +from thinc.neural.optimizers import Adam +from thinc.neural.ops import NumpyOps + +from ...attrs import NORM +from ...gold import GoldParse +from ...vocab import Vocab +from ...tokens import Doc +from ...pipeline import NeuralDependencyParser + +numpy.random.seed(0) + + +@pytest.fixture +def vocab(): + return Vocab(lex_attr_getters={NORM: lambda s: s}) + + +@pytest.fixture +def parser(vocab): + parser = NeuralDependencyParser(vocab) + parser.cfg['token_vector_width'] = 4 + parser.cfg['hidden_width'] = 6 + parser.cfg['hist_size'] = 0 + parser.add_label('left') + parser.begin_training([], **parser.cfg) + sgd = Adam(NumpyOps(), 0.001) + + for i in range(30): + losses = {} + doc = Doc(vocab, words=['a', 'b', 'c', 'd']) + gold = GoldParse(doc, heads=[1, 1, 3, 3], + deps=['left', 'ROOT', 'left', 'ROOT']) + parser.update([doc], [gold], sgd=sgd, losses=losses) + return parser + + +def test_add_label(parser): + doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd']) + doc = parser(doc) + assert doc[0].head.i == 1 + assert doc[0].dep_ == 'left' + assert doc[1].head.i == 1 + assert doc[2].head.i == 3 + assert doc[2].head.i == 3 + parser.add_label('right') + doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd']) + doc = parser(doc) + assert doc[0].head.i == 1 + assert doc[0].dep_ == 'left' + assert doc[1].head.i == 1 + assert doc[2].head.i == 3 + assert doc[2].head.i == 3 + sgd = Adam(NumpyOps(), 0.001) + for i in range(10): + losses = {} + doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd']) + gold = GoldParse(doc, heads=[1, 1, 3, 3], + deps=['right', 'ROOT', 'left', 'ROOT']) + parser.update([doc], [gold], sgd=sgd, losses=losses) + doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd']) + doc = parser(doc) + assert doc[0].dep_ == 'right' + assert doc[2].dep_ == 'left' +