Fixing pickling of the parser (#5218)

* fix __reduce__ for pickling parser

* setting the move object as 'state' during pickling

* unskip test_issue4725 - works again
This commit is contained in:
Sofie Van Landeghem 2020-03-27 19:35:26 +01:00 committed by GitHub
parent a0858ae761
commit 9b412516e7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 22 additions and 6 deletions

View File

@ -1173,7 +1173,13 @@ cdef class DependencyParser(Parser):
tok2vec=tok2vec, sgd=sgd) tok2vec=tok2vec, sgd=sgd)
def __reduce__(self): def __reduce__(self):
return (DependencyParser, (self.vocab, self.moves, self.model), None, None) return (DependencyParser, (self.vocab, self.model), self.moves)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property @property
def labels(self): def labels(self):
@ -1214,8 +1220,13 @@ cdef class EntityRecognizer(Parser):
tok2vec=tok2vec) tok2vec=tok2vec)
def __reduce__(self): def __reduce__(self):
return (EntityRecognizer, (self.vocab, self.moves, self.model), return (EntityRecognizer, (self.vocab, self.model), self.moves)
None, None)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property @property
def labels(self): def labels(self):

View File

@ -79,7 +79,13 @@ cdef class Parser:
return cls(nlp.vocab, model, **cfg) return cls(nlp.vocab, model, **cfg)
def __reduce__(self): def __reduce__(self):
return (Parser, (self.vocab, self.moves, self.model), None, None) return (Parser, (self.vocab, self.model), self.moves)
def __getstate__(self):
return self.moves
def __setstate__(self, moves):
self.moves = moves
@property @property
def move_names(self): def move_names(self):

View File

@ -5,7 +5,6 @@ from spacy.lang.en import English
from spacy.vocab import Vocab from spacy.vocab import Vocab
@pytest.mark.skip(reason="currently hangs")
def test_issue4725(): def test_issue4725():
# ensures that this runs correctly and doesn't hang or crash because of the global vectors # ensures that this runs correctly and doesn't hang or crash because of the global vectors
vocab = Vocab(vectors_name="test_vocab_add_vector") vocab = Vocab(vectors_name="test_vocab_add_vector")

View File

@ -131,7 +131,7 @@ shared vocab it depends on.
If you need to pickle multiple objects, try to pickle them **together** instead If you need to pickle multiple objects, try to pickle them **together** instead
of separately. For instance, instead of pickling all pipeline components, pickle of separately. For instance, instead of pickling all pipeline components, pickle
the entire pipeline once. And instead of pickling several `Doc` objects the entire pipeline once. And instead of pickling several `Doc` objects
separately, pickle a list of `Doc` objects. Since the all share a reference to separately, pickle a list of `Doc` objects. Since they all share a reference to
the _same_ `Vocab` object, it will only be included once. the _same_ `Vocab` object, it will only be included once.
```python ```python