mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Merge pull request #1401 from explosion/feature/add-parser-action
💫 Allow labels to be added to pre-trained parser and NER modes
This commit is contained in:
commit
d8a2506023
|
@ -743,5 +743,3 @@ def concatenate_lists(*layers, **kwargs): # pragma: no cover
|
|||
return ys, concatenate_lists_bwd
|
||||
model = wrap(concatenate_lists_fwd, concat)
|
||||
return model
|
||||
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ from .._ml import Tok2Vec, doc2feats, rebatch, fine_tune
|
|||
from .._ml import Residual, drop_layer, flatten
|
||||
from .._ml import link_vectors_to_models
|
||||
from .._ml import HistoryFeatures
|
||||
from ..compat import json_dumps
|
||||
from ..compat import json_dumps, copy_array
|
||||
|
||||
from . import _parse_features
|
||||
from ._parse_features cimport CONTEXT_SIZE
|
||||
|
@ -781,12 +781,22 @@ cdef class Parser:
|
|||
self.moves.finalize_doc(doc)
|
||||
|
||||
def add_label(self, label):
|
||||
resized = False
|
||||
for action in self.moves.action_types:
|
||||
added = self.moves.add_action(action, label)
|
||||
if added:
|
||||
# Important that the labels be stored as a list! We need the
|
||||
# order, or the model goes out of synch
|
||||
self.cfg.setdefault('extra_labels', []).append(label)
|
||||
resized = True
|
||||
if self.model not in (True, False, None) and resized:
|
||||
# Weights are stored in (nr_out, nr_in) format, so we're basically
|
||||
# just adding rows here.
|
||||
smaller = self.model[-1]._layers[-1]
|
||||
larger = Affine(self.moves.n_moves, smaller.nI)
|
||||
copy_array(larger.W[:smaller.nO], smaller.W)
|
||||
copy_array(larger.b[:smaller.nO], smaller.b)
|
||||
self.model[-1]._layers[-1] = larger
|
||||
|
||||
def begin_training(self, gold_tuples, pipeline=None, **cfg):
|
||||
if 'model' in cfg:
|
||||
|
|
68
spacy/tests/parser/test_add_label.py
Normal file
68
spacy/tests/parser/test_add_label.py
Normal file
|
@ -0,0 +1,68 @@
|
|||
'''Test the ability to add a label to a (potentially trained) parsing model.'''
|
||||
from __future__ import unicode_literals
|
||||
import pytest
|
||||
import numpy.random
|
||||
from thinc.neural.optimizers import Adam
|
||||
from thinc.neural.ops import NumpyOps
|
||||
|
||||
from ...attrs import NORM
|
||||
from ...gold import GoldParse
|
||||
from ...vocab import Vocab
|
||||
from ...tokens import Doc
|
||||
from ...pipeline import NeuralDependencyParser
|
||||
|
||||
numpy.random.seed(0)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def vocab():
|
||||
return Vocab(lex_attr_getters={NORM: lambda s: s})
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def parser(vocab):
|
||||
parser = NeuralDependencyParser(vocab)
|
||||
parser.cfg['token_vector_width'] = 4
|
||||
parser.cfg['hidden_width'] = 6
|
||||
parser.cfg['hist_size'] = 0
|
||||
parser.add_label('left')
|
||||
parser.begin_training([], **parser.cfg)
|
||||
sgd = Adam(NumpyOps(), 0.001)
|
||||
|
||||
for i in range(30):
|
||||
losses = {}
|
||||
doc = Doc(vocab, words=['a', 'b', 'c', 'd'])
|
||||
gold = GoldParse(doc, heads=[1, 1, 3, 3],
|
||||
deps=['left', 'ROOT', 'left', 'ROOT'])
|
||||
parser.update([doc], [gold], sgd=sgd, losses=losses)
|
||||
return parser
|
||||
|
||||
|
||||
def test_add_label(parser):
|
||||
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||
doc = parser(doc)
|
||||
assert doc[0].head.i == 1
|
||||
assert doc[0].dep_ == 'left'
|
||||
assert doc[1].head.i == 1
|
||||
assert doc[2].head.i == 3
|
||||
assert doc[2].head.i == 3
|
||||
parser.add_label('right')
|
||||
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||
doc = parser(doc)
|
||||
assert doc[0].head.i == 1
|
||||
assert doc[0].dep_ == 'left'
|
||||
assert doc[1].head.i == 1
|
||||
assert doc[2].head.i == 3
|
||||
assert doc[2].head.i == 3
|
||||
sgd = Adam(NumpyOps(), 0.001)
|
||||
for i in range(10):
|
||||
losses = {}
|
||||
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||
gold = GoldParse(doc, heads=[1, 1, 3, 3],
|
||||
deps=['right', 'ROOT', 'left', 'ROOT'])
|
||||
parser.update([doc], [gold], sgd=sgd, losses=losses)
|
||||
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])
|
||||
doc = parser(doc)
|
||||
assert doc[0].dep_ == 'right'
|
||||
assert doc[2].dep_ == 'left'
|
||||
|
Loading…
Reference in New Issue
Block a user