diff --git a/spacy/tests/serialize/test_serialize_vocab_strings.py b/spacy/tests/serialize/test_serialize_vocab_strings.py index 3cfcb5974..325245a25 100644 --- a/spacy/tests/serialize/test_serialize_vocab_strings.py +++ b/spacy/tests/serialize/test_serialize_vocab_strings.py @@ -192,8 +192,10 @@ def test_pickle_vocab(strings, lex_attr): ops = get_current_ops() vectors = Vectors(data=ops.xp.zeros((10, 10)), mode="floret", hash_count=1) vocab.vectors = vectors + vocab.lex_attr_data = {"a": 1} vocab[strings[0]].norm_ = lex_attr vocab_pickled = pickle.dumps(vocab) vocab_unpickled = pickle.loads(vocab_pickled) assert vocab.to_bytes() == vocab_unpickled.to_bytes() assert vocab_unpickled.vectors.mode == "floret" + assert vocab_unpickled.lex_attr_data == vocab.lex_attr_data diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 215d0ab95..d62cf71e3 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -556,18 +556,20 @@ def pickle_vocab(vocab): vectors = vocab.vectors morph = vocab.morphology lex_attr_getters = srsly.pickle_dumps(vocab.lex_attr_getters) + lex_attr_data = vocab.lex_attr_data lookups = vocab.lookups get_noun_chunks = vocab.get_noun_chunks return (unpickle_vocab, - (sstore, vectors, morph, lex_attr_getters, lookups, get_noun_chunks)) + (sstore, vectors, morph, lex_attr_getters, lex_attr_data, lookups, get_noun_chunks)) -def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lookups, get_noun_chunks): +def unpickle_vocab(sstore, vectors, morphology, lex_attr_getters, lex_attr_data, lookups, get_noun_chunks): cdef Vocab vocab = Vocab() vocab.vectors = vectors vocab.strings = sstore vocab.morphology = morphology vocab.lex_attr_getters = srsly.pickle_loads(lex_attr_getters) + vocab.lex_attr_data = lex_attr_data vocab.lookups = lookups vocab.get_noun_chunks = get_noun_chunks return vocab