mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-14 18:22:27 +03:00
* Allow StringStore to be pickled, to start addressing Issue #125
This commit is contained in:
parent
41012907a8
commit
0cee928467
|
@ -69,12 +69,15 @@ cdef Utf8Str _allocate(Pool mem, const unsigned char* chars, int length) except
|
||||||
|
|
||||||
cdef class StringStore:
|
cdef class StringStore:
|
||||||
'''Map strings to and from integer IDs.'''
|
'''Map strings to and from integer IDs.'''
|
||||||
def __init__(self):
|
def __init__(self, strings=None):
|
||||||
self.mem = Pool()
|
self.mem = Pool()
|
||||||
self._map = PreshMap()
|
self._map = PreshMap()
|
||||||
self._resize_at = 10000
|
self._resize_at = 10000
|
||||||
self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
|
self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
|
||||||
self.size = 1
|
self.size = 1
|
||||||
|
if strings is not None:
|
||||||
|
for string in strings:
|
||||||
|
_ = self[string]
|
||||||
|
|
||||||
property size:
|
property size:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
@ -113,6 +116,14 @@ cdef class StringStore:
|
||||||
for i in range(self.size):
|
for i in range(self.size):
|
||||||
yield self[i]
|
yield self[i]
|
||||||
|
|
||||||
|
def __reduce__(self):
|
||||||
|
strings = [""]
|
||||||
|
for i in range(1, self.size):
|
||||||
|
string = &self.c[i]
|
||||||
|
py_string = _decode(string)
|
||||||
|
strings.append(py_string)
|
||||||
|
return (StringStore, (strings,), None, None, None)
|
||||||
|
|
||||||
cdef const Utf8Str* intern(self, unsigned char* chars, int length) except NULL:
|
cdef const Utf8Str* intern(self, unsigned char* chars, int length) except NULL:
|
||||||
# 0 means missing, but we don't bother offsetting the index.
|
# 0 means missing, but we don't bother offsetting the index.
|
||||||
key = hash64(chars, length * sizeof(char), 0)
|
key = hash64(chars, length * sizeof(char), 0)
|
||||||
|
|
|
@ -1,5 +1,7 @@
|
||||||
# -*- coding: utf8 -*-
|
# -*- coding: utf8 -*-
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
import pickle
|
||||||
|
import StringIO
|
||||||
|
|
||||||
from spacy.strings import StringStore
|
from spacy.strings import StringStore
|
||||||
|
|
||||||
|
@ -76,3 +78,18 @@ def test_massive_strings(sstore):
|
||||||
s513 = '1' * 513
|
s513 = '1' * 513
|
||||||
orth = sstore[s513]
|
orth = sstore[s513]
|
||||||
assert sstore[orth] == s513
|
assert sstore[orth] == s513
|
||||||
|
|
||||||
|
|
||||||
|
def test_pickle_string_store(sstore):
|
||||||
|
hello_id = sstore[u'Hi']
|
||||||
|
string_file = StringIO.StringIO()
|
||||||
|
pickle.dump(sstore, string_file)
|
||||||
|
|
||||||
|
string_file.seek(0)
|
||||||
|
|
||||||
|
loaded = pickle.load(string_file)
|
||||||
|
|
||||||
|
assert loaded[hello_id] == u'Hi'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user