Merge branch 'develop' of https://github.com/explosion/spaCy into develop

This commit is contained in:
Matthew Honnibal 2017-10-11 08:23:04 +02:00
commit 462b2e26b4
3 changed files with 21 additions and 17 deletions

View File

@ -114,10 +114,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
util.set_env_log(False)
epoch_model_path = output_path / ('model%d' % i)
nlp.to_disk(epoch_model_path)
nlp_loaded = lang_class(pipeline=pipeline)
for name in pipeline:
nlp_loaded.add_pipe(nlp.create_pipe(name), name=name)
nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
nlp_loaded = util.load_model_from_path(epoch_model_path)
dev_docs = list(corpus.dev_docs(
nlp_loaded,
gold_preproc=gold_preproc))
@ -131,11 +128,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
else:
gpu_wps = nwords/(end_time-start_time)
with Model.use_device('cpu'):
nlp_loaded = lang_class(pipeline=pipeline)
for name in pipeline:
nlp_loaded.add_pipe(nlp.create_pipe(name), name=name)
nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
nlp_loaded = util.load_model_from_path(epoch_model_path)
dev_docs = list(corpus.dev_docs(
nlp_loaded, gold_preproc=gold_preproc))
start_time = timer()

View File

@ -800,6 +800,15 @@ cdef class Parser:
if self.model not in (True, False, None) and resized:
# Weights are stored in (nr_out, nr_in) format, so we're basically
# just adding rows here.
if self.model[-1].is_noop:
smaller = self.model[1]
dims = dict(self.model[1]._dims)
dims['nO'] = self.moves.n_moves
larger = self.model[1].__class__(**dims)
copy_array(larger.W[:, :smaller.nO], smaller.W)
copy_array(larger.b[:smaller.nO], smaller.b)
self.model = (self.model[0], larger, self.model[2])
else:
smaller = self.model[-1]._layers[-1]
larger = Affine(self.moves.n_moves, smaller.nI)
copy_array(larger.W[:smaller.nO], smaller.W)

View File

@ -22,14 +22,14 @@ def vocab():
@pytest.fixture
def parser(vocab):
parser = NeuralDependencyParser(vocab)
parser.cfg['token_vector_width'] = 4
parser.cfg['hidden_width'] = 6
parser.cfg['token_vector_width'] = 8
parser.cfg['hidden_width'] = 30
parser.cfg['hist_size'] = 0
parser.add_label('left')
parser.begin_training([], **parser.cfg)
sgd = Adam(NumpyOps(), 0.001)
for i in range(30):
for i in range(10):
losses = {}
doc = Doc(vocab, words=['a', 'b', 'c', 'd'])
gold = GoldParse(doc, heads=[1, 1, 3, 3],
@ -37,6 +37,8 @@ def parser(vocab):
parser.update([doc], [gold], sgd=sgd, losses=losses)
return parser
def test_init_parser(parser):
pass
def test_add_label(parser):
doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])