diff --git a/spacy/_ml.py b/spacy/_ml.py index 8695a88cc..fb7d39255 100644 --- a/spacy/_ml.py +++ b/spacy/_ml.py @@ -296,8 +296,7 @@ def link_vectors_to_models(vocab): key = (ops.device, vectors.name) if key in thinc.extra.load_nlp.VECTORS: if thinc.extra.load_nlp.VECTORS[key].shape != data.shape: - # This is a hack to avoid the problem in #3853. Maybe we should - # print a warning as well? + # This is a hack to avoid the problem in #3853. old_name = vectors.name new_name = vectors.name + "_%d" % data.shape[0] user_warning(Warnings.W019.format(old=old_name, new=new_name)) diff --git a/spacy/language.py b/spacy/language.py index 16aa4967e..28fddfebb 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -3,6 +3,9 @@ from __future__ import absolute_import, unicode_literals import random import itertools + +from thinc.extra import load_nlp + from spacy.util import minibatch import weakref import functools @@ -856,7 +859,7 @@ class Language(object): procs = [ mp.Process( target=_apply_pipes, - args=(self.make_doc, pipes, rch, sch, Underscore.get_state()), + args=(self.make_doc, pipes, rch, sch, Underscore.get_state(), load_nlp.VECTORS), ) for rch, sch in zip(texts_q, bytedocs_send_ch) ] @@ -1112,7 +1115,7 @@ def _pipe(docs, proc, kwargs): yield doc -def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state): +def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state, vectors): """Worker for Language.pipe receiver (multiprocessing.Connection): Pipe to receive text. Usually @@ -1120,8 +1123,10 @@ def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state): sender (multiprocessing.Connection): Pipe to send doc. Usually created by `multiprocessing.Pipe()` underscore_state (tuple): The data in the Underscore class of the parent + vectors (dict): The global vectors data, copied from the parent """ Underscore.load_state(underscore_state) + load_nlp.VECTORS = vectors while True: texts = receiver.get() docs = (make_doc(text) for text in texts) diff --git a/spacy/tests/regression/test_issue4725.py b/spacy/tests/regression/test_issue4725.py new file mode 100644 index 000000000..f80f19852 --- /dev/null +++ b/spacy/tests/regression/test_issue4725.py @@ -0,0 +1,26 @@ +# coding: utf8 +from __future__ import unicode_literals + +import numpy + +from spacy.lang.en import English +from spacy.vocab import Vocab + + +def test_issue4725(): + # ensures that this runs correctly and doesn't hang or crash because of the global vectors + vocab = Vocab(vectors_name="test_vocab_add_vector") + data = numpy.ndarray((5, 3), dtype="f") + data[0] = 1.0 + data[1] = 2.0 + vocab.set_vector("cat", data[0]) + vocab.set_vector("dog", data[1]) + + nlp = English(vocab=vocab) + ner = nlp.create_pipe("ner") + nlp.add_pipe(ner) + nlp.begin_training() + docs = ["Kurt is in London."] * 10 + for _ in nlp.pipe(docs, batch_size=2, n_process=2): + pass + diff --git a/spacy/tests/regression/test_issue4849.py b/spacy/tests/regression/test_issue4849.py index 85d03fe9a..834219773 100644 --- a/spacy/tests/regression/test_issue4849.py +++ b/spacy/tests/regression/test_issue4849.py @@ -3,7 +3,6 @@ from __future__ import unicode_literals from spacy.lang.en import English from spacy.pipeline import EntityRuler -from spacy.tokens.underscore import Underscore def test_issue4849(): diff --git a/spacy/tests/regression/test_issue4903.py b/spacy/tests/regression/test_issue4903.py index 9a3c10d61..d467b1cd6 100644 --- a/spacy/tests/regression/test_issue4903.py +++ b/spacy/tests/regression/test_issue4903.py @@ -1,10 +1,8 @@ # coding: utf8 from __future__ import unicode_literals -import spacy from spacy.lang.en import English from spacy.tokens import Span, Doc -from spacy.tokens.underscore import Underscore class CustomPipe: