mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Merge pull request #1401 from explosion/feature/add-parser-action
💫 Allow labels to be added to pre-trained parser and NER modes
This commit is contained in:
commit
d8a2506023
|
@ -743,5 +743,3 @@ def concatenate_lists(*layers, **kwargs): # pragma: no cover
|
||||||
return ys, concatenate_lists_bwd
|
return ys, concatenate_lists_bwd
|
||||||
model = wrap(concatenate_lists_fwd, concat)
|
model = wrap(concatenate_lists_fwd, concat)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
|
||||||
from .._ml import Residual, drop_layer, flatten
|
from .._ml import Residual, drop_layer, flatten
|
||||||
from .._ml import link_vectors_to_models
|
from .._ml import link_vectors_to_models
|
||||||
from .._ml import HistoryFeatures
|
from .._ml import HistoryFeatures
|
||||||
from ..compat import json_dumps
|
from ..compat import json_dumps, copy_array
|
||||||
|
|
||||||
from . import _parse_features
|
from . import _parse_features
|
||||||
from ._parse_features cimport CONTEXT_SIZE
|
from ._parse_features cimport CONTEXT_SIZE
|
||||||
|
@ -781,12 +781,22 @@ cdef class Parser:
|
||||||
self.moves.finalize_doc(doc)
|
self.moves.finalize_doc(doc)
|
||||||
|
|
||||||
def add_label(self, label):
|
def add_label(self, label):
|
||||||
|
resized = False
|
||||||
for action in self.moves.action_types:
|
for action in self.moves.action_types:
|
||||||
added = self.moves.add_action(action, label)
|
added = self.moves.add_action(action, label)
|
||||||
if added:
|
if added:
|
||||||
# Important that the labels be stored as a list! We need the
|
# Important that the labels be stored as a list! We need the
|
||||||
# order, or the model goes out of synch
|
# order, or the model goes out of synch
|
||||||
self.cfg.setdefault('extra_labels', []).append(label)
|
self.cfg.setdefault('extra_labels', []).append(label)
|
||||||
|
resized = True
|
||||||
|
if self.model not in (True, False, None) and resized:
|
||||||
|
# Weights are stored in (nr_out, nr_in) format, so we're basically
|
||||||
|
# just adding rows here.
|
||||||
|
smaller = self.model[-1]._layers[-1]
|
||||||
|
larger = Affine(self.moves.n_moves, smaller.nI)
|
||||||
|
copy_array(larger.W[:smaller.nO], smaller.W)
|
||||||
|
copy_array(larger.b[:smaller.nO], smaller.b)
|
||||||
|
self.model[-1]._layers[-1] = larger
|
||||||
|
|
||||||
def begin_training(self, gold_tuples, pipeline=None, **cfg):
|
def begin_training(self, gold_tuples, pipeline=None, **cfg):
|
||||||
if 'model' in cfg:
|
if 'model' in cfg:
|
||||||
|
|
68
spacy/tests/parser/test_add_label.py
Normal file
68
spacy/tests/parser/test_add_label.py
Normal file
|
@ -0,0 +1,68 @@
|
||||||
|
'''Test the ability to add a label to a (potentially trained) parsing model.'''
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
import pytest
|
||||||
|
import numpy.random
|
||||||
|
from thinc.neural.optimizers import Adam
|
||||||
|
from thinc.neural.ops import NumpyOps
|
||||||
|
|
||||||
|
from ...attrs import NORM
|
||||||
|
from ...gold import GoldParse
|
||||||
|
from ...vocab import Vocab
|
||||||
|
from ...tokens import Doc
|
||||||
|
from ...pipeline import NeuralDependencyParser
|
||||||
|
|
||||||
|
numpy.random.seed(0)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def vocab():
|
||||||
|
return Vocab(lex_attr_getters={NORM: lambda s: s})
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def parser(vocab):
|
||||||
|
parser = NeuralDependencyParser(vocab)
|
||||||
|
parser.cfg['token_vector_width'] = 4
|
||||||
|
parser.cfg['hidden_width'] = 6
|
||||||
|
parser.cfg['hist_size'] = 0
|
||||||
|
parser.add_label('left')
|
||||||
|
parser.begin_training([], **parser.cfg)
|
||||||
|
sgd = Adam(NumpyOps(), 0.001)
|
||||||
|
|
||||||
|
for i in range(30):
|
||||||
|
losses = {}
|
||||||
|
doc = Doc(vocab, words=['a', 'b', 'c', 'd'])
|
||||||
|
gold = GoldParse(doc, heads=[1, 1, 3, 3],
|
||||||
|
deps=['left', 'ROOT', 'left', 'ROOT'])
|
||||||
|
parser.update([doc], [gold], sgd=sgd, losses=losses)
|
||||||
|
return parser
|
||||||
|
|
||||||
|
|
||||||
|
def test_add_label(parser):
|
||||||
|
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||||
|
doc = parser(doc)
|
||||||
|
assert doc[0].head.i == 1
|
||||||
|
assert doc[0].dep_ == 'left'
|
||||||
|
assert doc[1].head.i == 1
|
||||||
|
assert doc[2].head.i == 3
|
||||||
|
assert doc[2].head.i == 3
|
||||||
|
parser.add_label('right')
|
||||||
|
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||||
|
doc = parser(doc)
|
||||||
|
assert doc[0].head.i == 1
|
||||||
|
assert doc[0].dep_ == 'left'
|
||||||
|
assert doc[1].head.i == 1
|
||||||
|
assert doc[2].head.i == 3
|
||||||
|
assert doc[2].head.i == 3
|
||||||
|
sgd = Adam(NumpyOps(), 0.001)
|
||||||
|
for i in range(10):
|
||||||
|
losses = {}
|
||||||
|
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||||
|
gold = GoldParse(doc, heads=[1, 1, 3, 3],
|
||||||
|
deps=['right', 'ROOT', 'left', 'ROOT'])
|
||||||
|
parser.update([doc], [gold], sgd=sgd, losses=losses)
|
||||||
|
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||||
|
doc = parser(doc)
|
||||||
|
assert doc[0].dep_ == 'right'
|
||||||
|
assert doc[2].dep_ == 'left'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user