mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
* Allow StringStore to be pickled, to start addressing Issue #125
This commit is contained in:
parent
41012907a8
commit
0cee928467
|
@ -69,12 +69,15 @@ cdef Utf8Str _allocate(Pool mem, const unsigned char* chars, int length) except
|
|||
|
||||
cdef class StringStore:
|
||||
'''Map strings to and from integer IDs.'''
|
||||
def __init__(self):
|
||||
def __init__(self, strings=None):
|
||||
self.mem = Pool()
|
||||
self._map = PreshMap()
|
||||
self._resize_at = 10000
|
||||
self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
|
||||
self.size = 1
|
||||
if strings is not None:
|
||||
for string in strings:
|
||||
_ = self[string]
|
||||
|
||||
property size:
|
||||
def __get__(self):
|
||||
|
@ -113,6 +116,14 @@ cdef class StringStore:
|
|||
for i in range(self.size):
|
||||
yield self[i]
|
||||
|
||||
def __reduce__(self):
|
||||
strings = [""]
|
||||
for i in range(1, self.size):
|
||||
string = &self.c[i]
|
||||
py_string = _decode(string)
|
||||
strings.append(py_string)
|
||||
return (StringStore, (strings,), None, None, None)
|
||||
|
||||
cdef const Utf8Str* intern(self, unsigned char* chars, int length) except NULL:
|
||||
# 0 means missing, but we don't bother offsetting the index.
|
||||
key = hash64(chars, length * sizeof(char), 0)
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
# -*- coding: utf8 -*-
|
||||
from __future__ import unicode_literals
|
||||
import pickle
|
||||
import StringIO
|
||||
|
||||
from spacy.strings import StringStore
|
||||
|
||||
|
@ -76,3 +78,18 @@ def test_massive_strings(sstore):
|
|||
s513 = '1' * 513
|
||||
orth = sstore[s513]
|
||||
assert sstore[orth] == s513
|
||||
|
||||
|
||||
def test_pickle_string_store(sstore):
|
||||
hello_id = sstore[u'Hi']
|
||||
string_file = StringIO.StringIO()
|
||||
pickle.dump(sstore, string_file)
|
||||
|
||||
string_file.seek(0)
|
||||
|
||||
loaded = pickle.load(string_file)
|
||||
|
||||
assert loaded[hello_id] == u'Hi'
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user