From 5ca57bd859a0ea57108baab454ac5bb073e62afb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 12 Oct 2015 15:27:47 +1100 Subject: [PATCH] * Ensure Morphology can be pickled, to address Issue #125. --- spacy/morphology.pxd | 1 + spacy/morphology.pyx | 4 ++++ tests/morphology/test_pickle.py | 17 +++++++++++++++++ 3 files changed, 22 insertions(+) create mode 100644 tests/morphology/test_pickle.py diff --git a/spacy/morphology.pxd b/spacy/morphology.pxd index 62d3fccc1..847626158 100644 --- a/spacy/morphology.pxd +++ b/spacy/morphology.pxd @@ -25,6 +25,7 @@ cdef class Morphology: cdef readonly Pool mem cdef readonly StringStore strings cdef public object lemmatizer + cdef readonly object tag_map cdef public object n_tags cdef public object reverse_index cdef public object tag_names diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index c53e5f478..e8b1f3520 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -14,6 +14,7 @@ cdef class Morphology: def __init__(self, StringStore string_store, tag_map, lemmatizer): self.mem = Pool() self.strings = string_store + self.tag_map = tag_map self.lemmatizer = lemmatizer self.n_tags = len(tag_map) + 1 self.tag_names = tuple(sorted(tag_map.keys())) @@ -28,6 +29,9 @@ cdef class Morphology: self.reverse_index[self.rich_tags[i].name] = i self._cache = PreshMapArray(self.n_tags) + def __reduce__(self): + return (Morphology, (self.strings, self.tag_map, self.lemmatizer), None, None) + cdef int assign_tag(self, TokenC* token, tag) except -1: cdef int tag_id if isinstance(tag, basestring): diff --git a/tests/morphology/test_pickle.py b/tests/morphology/test_pickle.py new file mode 100644 index 000000000..f1b5bcd4c --- /dev/null +++ b/tests/morphology/test_pickle.py @@ -0,0 +1,17 @@ +import pytest + +import pickle +import StringIO + + +from spacy.morphology import Morphology +from spacy.lemmatizer import Lemmatizer +from spacy.strings import StringStore + + +def test_pickle(): + morphology = Morphology(StringStore(), {}, Lemmatizer({}, {}, {})) + + file_ = StringIO.StringIO() + pickle.dump(morphology, file_) +