Fixing pickling of the parser (#5218)

* fix __reduce__ for pickling parser

* setting the move object as 'state' during pickling

* unskip test_issue4725 - works again
This commit is contained in:
Sofie Van Landeghem 2020-03-27 19:35:26 +01:00 committed by GitHub
parent a0858ae761
commit 9b412516e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 6 deletions

View File

@ -1173,7 +1173,13 @@ cdef class DependencyParser(Parser):
tok2vec=tok2vec, sgd=sgd)
def __reduce__(self):
return (DependencyParser, (self.vocab, self.moves, self.model), None, None)
return (DependencyParser, (self.vocab, self.model), self.moves)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property
def labels(self):
@ -1214,8 +1220,13 @@ cdef class EntityRecognizer(Parser):
tok2vec=tok2vec)
def __reduce__(self):
return (EntityRecognizer, (self.vocab, self.moves, self.model),
None, None)
return (EntityRecognizer, (self.vocab, self.model), self.moves)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property
def labels(self):

View File

@ -79,7 +79,13 @@ cdef class Parser:
return cls(nlp.vocab, model, **cfg)
def __reduce__(self):
return (Parser, (self.vocab, self.moves, self.model), None, None)
return (Parser, (self.vocab, self.model), self.moves)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property
def move_names(self):

View File

@ -5,7 +5,6 @@ from spacy.lang.en import English
from spacy.vocab import Vocab
@pytest.mark.skip(reason="currently hangs")
def test_issue4725():
# ensures that this runs correctly and doesn't hang or crash because of the global vectors
vocab = Vocab(vectors_name="test_vocab_add_vector")

View File

@ -131,7 +131,7 @@ shared vocab it depends on.
If you need to pickle multiple objects, try to pickle them **together** instead
of separately. For instance, instead of pickling all pipeline components, pickle
the entire pipeline once. And instead of pickling several `Doc` objects
separately, pickle a list of `Doc` objects. Since the all share a reference to
separately, pickle a list of `Doc` objects. Since they all share a reference to
the _same_ `Vocab` object, it will only be included once.
```python