mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	take care of global vectors in multiprocessing (#5081)
* restore load_nlp.VECTORS in the child process * add unit test * fix test * remove unnecessary import * add utf8 encoding * import unicode_literals
This commit is contained in:
		
							parent
							
								
									d078b47c81
								
							
						
					
					
						commit
						d307e9ca58
					
				|  | @ -296,8 +296,7 @@ def link_vectors_to_models(vocab): | ||||||
|     key = (ops.device, vectors.name) |     key = (ops.device, vectors.name) | ||||||
|     if key in thinc.extra.load_nlp.VECTORS: |     if key in thinc.extra.load_nlp.VECTORS: | ||||||
|         if thinc.extra.load_nlp.VECTORS[key].shape != data.shape: |         if thinc.extra.load_nlp.VECTORS[key].shape != data.shape: | ||||||
|             # This is a hack to avoid the problem in #3853. Maybe we should |             # This is a hack to avoid the problem in #3853. | ||||||
|             # print a warning as well? |  | ||||||
|             old_name = vectors.name |             old_name = vectors.name | ||||||
|             new_name = vectors.name + "_%d" % data.shape[0] |             new_name = vectors.name + "_%d" % data.shape[0] | ||||||
|             user_warning(Warnings.W019.format(old=old_name, new=new_name)) |             user_warning(Warnings.W019.format(old=old_name, new=new_name)) | ||||||
|  |  | ||||||
|  | @ -3,6 +3,9 @@ from __future__ import absolute_import, unicode_literals | ||||||
| 
 | 
 | ||||||
| import random | import random | ||||||
| import itertools | import itertools | ||||||
|  | 
 | ||||||
|  | from thinc.extra import load_nlp | ||||||
|  | 
 | ||||||
| from spacy.util import minibatch | from spacy.util import minibatch | ||||||
| import weakref | import weakref | ||||||
| import functools | import functools | ||||||
|  | @ -856,7 +859,7 @@ class Language(object): | ||||||
|         procs = [ |         procs = [ | ||||||
|             mp.Process( |             mp.Process( | ||||||
|                 target=_apply_pipes, |                 target=_apply_pipes, | ||||||
|                 args=(self.make_doc, pipes, rch, sch, Underscore.get_state()), |                 args=(self.make_doc, pipes, rch, sch, Underscore.get_state(), load_nlp.VECTORS), | ||||||
|             ) |             ) | ||||||
|             for rch, sch in zip(texts_q, bytedocs_send_ch) |             for rch, sch in zip(texts_q, bytedocs_send_ch) | ||||||
|         ] |         ] | ||||||
|  | @ -1112,7 +1115,7 @@ def _pipe(docs, proc, kwargs): | ||||||
|         yield doc |         yield doc | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state): | def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state, vectors): | ||||||
|     """Worker for Language.pipe |     """Worker for Language.pipe | ||||||
| 
 | 
 | ||||||
|     receiver (multiprocessing.Connection): Pipe to receive text. Usually |     receiver (multiprocessing.Connection): Pipe to receive text. Usually | ||||||
|  | @ -1120,8 +1123,10 @@ def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state): | ||||||
|     sender (multiprocessing.Connection): Pipe to send doc. Usually created by |     sender (multiprocessing.Connection): Pipe to send doc. Usually created by | ||||||
|         `multiprocessing.Pipe()` |         `multiprocessing.Pipe()` | ||||||
|     underscore_state (tuple): The data in the Underscore class of the parent |     underscore_state (tuple): The data in the Underscore class of the parent | ||||||
|  |     vectors (dict): The global vectors data, copied from the parent | ||||||
|     """ |     """ | ||||||
|     Underscore.load_state(underscore_state) |     Underscore.load_state(underscore_state) | ||||||
|  |     load_nlp.VECTORS = vectors | ||||||
|     while True: |     while True: | ||||||
|         texts = receiver.get() |         texts = receiver.get() | ||||||
|         docs = (make_doc(text) for text in texts) |         docs = (make_doc(text) for text in texts) | ||||||
|  |  | ||||||
							
								
								
									
										26
									
								
								spacy/tests/regression/test_issue4725.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								spacy/tests/regression/test_issue4725.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | ||||||
|  | # coding: utf8 | ||||||
|  | from __future__ import unicode_literals | ||||||
|  | 
 | ||||||
|  | import numpy | ||||||
|  | 
 | ||||||
|  | from spacy.lang.en import English | ||||||
|  | from spacy.vocab import Vocab | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | def test_issue4725(): | ||||||
|  |     # ensures that this runs correctly and doesn't hang or crash because of the global vectors | ||||||
|  |     vocab = Vocab(vectors_name="test_vocab_add_vector") | ||||||
|  |     data = numpy.ndarray((5, 3), dtype="f") | ||||||
|  |     data[0] = 1.0 | ||||||
|  |     data[1] = 2.0 | ||||||
|  |     vocab.set_vector("cat", data[0]) | ||||||
|  |     vocab.set_vector("dog", data[1]) | ||||||
|  | 
 | ||||||
|  |     nlp = English(vocab=vocab) | ||||||
|  |     ner = nlp.create_pipe("ner") | ||||||
|  |     nlp.add_pipe(ner) | ||||||
|  |     nlp.begin_training() | ||||||
|  |     docs = ["Kurt is in London."] * 10 | ||||||
|  |     for _ in nlp.pipe(docs, batch_size=2, n_process=2): | ||||||
|  |         pass | ||||||
|  | 
 | ||||||
|  | @ -3,7 +3,6 @@ from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| from spacy.lang.en import English | from spacy.lang.en import English | ||||||
| from spacy.pipeline import EntityRuler | from spacy.pipeline import EntityRuler | ||||||
| from spacy.tokens.underscore import Underscore |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_issue4849(): | def test_issue4849(): | ||||||
|  |  | ||||||
|  | @ -1,10 +1,8 @@ | ||||||
| # coding: utf8 | # coding: utf8 | ||||||
| from __future__ import unicode_literals | from __future__ import unicode_literals | ||||||
| 
 | 
 | ||||||
| import spacy |  | ||||||
| from spacy.lang.en import English | from spacy.lang.en import English | ||||||
| from spacy.tokens import Span, Doc | from spacy.tokens import Span, Doc | ||||||
| from spacy.tokens.underscore import Underscore |  | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| class CustomPipe: | class CustomPipe: | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user