Merge branch 'develop' of https://github.com/explosion/spaCy into develop

2025-12-19 16:14:39 +03:00 · 2017-10-11 08:23:04 +02:00 · 2017-10-11 08:23:04 +02:00 · 462b2e26b4
commit 462b2e26b4
parent 3b527fa52b d84136b4a9
3 changed files with 21 additions and 17 deletions
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -114,10 +114,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
                util.set_env_log(False)
                epoch_model_path = output_path / ('model%d' % i)
                nlp.to_disk(epoch_model_path)
-                nlp_loaded = lang_class(pipeline=pipeline)
-                for name in pipeline:
-                    nlp_loaded.add_pipe(nlp.create_pipe(name), name=name)
-                nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
+                nlp_loaded = util.load_model_from_path(epoch_model_path)
                dev_docs = list(corpus.dev_docs(
                                nlp_loaded,
                                gold_preproc=gold_preproc))
@ -131,11 +128,7 @@ def train(cmd, lang, output_dir, train_data, dev_data, n_iter=10, n_sents=0,
                else:
                    gpu_wps = nwords/(end_time-start_time)
                    with Model.use_device('cpu'):
-                        nlp_loaded = lang_class(pipeline=pipeline)
-                        for name in pipeline:
-                            nlp_loaded.add_pipe(nlp.create_pipe(name), name=name)
-
-                        nlp_loaded = nlp_loaded.from_disk(epoch_model_path)
+                        nlp_loaded = util.load_model_from_path(epoch_model_path)
                        dev_docs = list(corpus.dev_docs(
                                        nlp_loaded, gold_preproc=gold_preproc))
                        start_time = timer()
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@ -800,6 +800,15 @@ cdef class Parser:
        if self.model not in (True, False, None) and resized:
            # Weights are stored in (nr_out, nr_in) format, so we're basically
            # just adding rows here.
+            if self.model[-1].is_noop:
+                smaller = self.model[1]
+                dims = dict(self.model[1]._dims)
+                dims['nO'] = self.moves.n_moves
+                larger = self.model[1].__class__(**dims)
+                copy_array(larger.W[:, :smaller.nO], smaller.W)
+                copy_array(larger.b[:smaller.nO], smaller.b)
+                self.model = (self.model[0], larger, self.model[2])
+            else:
                smaller = self.model[-1]._layers[-1]
                larger = Affine(self.moves.n_moves, smaller.nI)
                copy_array(larger.W[:smaller.nO], smaller.W)
--- a/spacy/tests/parser/test_add_label.py
+++ b/spacy/tests/parser/test_add_label.py
@ -22,14 +22,14 @@ def vocab():
@pytest.fixture
 def parser(vocab):
    parser = NeuralDependencyParser(vocab)
-    parser.cfg['token_vector_width'] = 4
-    parser.cfg['hidden_width'] = 6
+    parser.cfg['token_vector_width'] = 8
+    parser.cfg['hidden_width'] = 30
    parser.cfg['hist_size'] = 0
    parser.add_label('left')
    parser.begin_training([], **parser.cfg)
    sgd = Adam(NumpyOps(), 0.001)

-    for i in range(30):
+    for i in range(10):
        losses = {}
        doc = Doc(vocab, words=['a', 'b', 'c', 'd'])
        gold = GoldParse(doc, heads=[1, 1, 3, 3],
@ -37,6 +37,8 @@ def parser(vocab):
        parser.update([doc], [gold], sgd=sgd, losses=losses)
    return parser

+def test_init_parser(parser):
+    pass

 def test_add_label(parser):
    doc = Doc(parser.vocab, words=['a', 'b', 'c', 'd'])