mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
load Underscore state when multiprocessing
This commit is contained in:
parent
99a543367d
commit
ecbb9c4b9f
|
@ -15,6 +15,7 @@ import multiprocessing as mp
|
|||
from itertools import chain, cycle
|
||||
|
||||
from .tokenizer import Tokenizer
|
||||
from .tokens.underscore import Underscore
|
||||
from .vocab import Vocab
|
||||
from .lemmatizer import Lemmatizer
|
||||
from .lookups import Lookups
|
||||
|
@ -852,7 +853,10 @@ class Language(object):
|
|||
sender.send()
|
||||
|
||||
procs = [
|
||||
mp.Process(target=_apply_pipes, args=(self.make_doc, pipes, rch, sch))
|
||||
mp.Process(
|
||||
target=_apply_pipes,
|
||||
args=(self.make_doc, pipes, rch, sch, Underscore.get_state()),
|
||||
)
|
||||
for rch, sch in zip(texts_q, bytedocs_send_ch)
|
||||
]
|
||||
for proc in procs:
|
||||
|
@ -1107,7 +1111,7 @@ def _pipe(docs, proc, kwargs):
|
|||
yield doc
|
||||
|
||||
|
||||
def _apply_pipes(make_doc, pipes, reciever, sender):
|
||||
def _apply_pipes(make_doc, pipes, receiver, sender, underscore_state):
|
||||
"""Worker for Language.pipe
|
||||
|
||||
receiver (multiprocessing.Connection): Pipe to receive text. Usually
|
||||
|
@ -1115,8 +1119,9 @@ def _apply_pipes(make_doc, pipes, reciever, sender):
|
|||
sender (multiprocessing.Connection): Pipe to send doc. Usually created by
|
||||
`multiprocessing.Pipe()`
|
||||
"""
|
||||
Underscore.load_state(underscore_state)
|
||||
while True:
|
||||
texts = reciever.get()
|
||||
texts = receiver.get()
|
||||
docs = (make_doc(text) for text in texts)
|
||||
for pipe in pipes:
|
||||
docs = pipe(docs)
|
||||
|
|
|
@ -79,6 +79,14 @@ class Underscore(object):
|
|||
def _get_key(self, name):
|
||||
return ("._.", name, self._start, self._end)
|
||||
|
||||
@classmethod
|
||||
def get_state(cls):
|
||||
return cls.token_extensions, cls.span_extensions, cls.doc_extensions
|
||||
|
||||
@classmethod
|
||||
def load_state(cls, state):
|
||||
cls.token_extensions, cls.span_extensions, cls.doc_extensions = state
|
||||
|
||||
|
||||
def get_ext_args(**kwargs):
|
||||
"""Validate and convert arguments. Reused in Doc, Token and Span."""
|
||||
|
|
Loading…
Reference in New Issue
Block a user