Use safer method to get string without hit

This commit is contained in:
Roman Domrachev 2017-11-14 22:58:46 +03:00
parent a33d5a068d
commit 3e21680814
3 changed files with 9 additions and 2 deletions

View File

@ -251,6 +251,7 @@ cdef class StringStore:
def _cleanup_stale_strings(self, excepted):
"""
excepted (list): Strings that should not be removed.
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
"""
if self.hits.size() == 0:
@ -262,7 +263,9 @@ cdef class StringStore:
dropped_keys = []
for i in range(self.keys.size()):
key = self.keys[i]
value = self[key]
# Here we cannot use __getitem__ because it also set hit.
utf8str = <Utf8Str*>self._map.get(key)
value = decode_Utf8Str(utf8str)
if self.hits.count(key) != 0 or value in excepted:
tmp.push_back(key)
else:

View File

@ -13,6 +13,9 @@ def test_issue1506():
for _ in range(10001):
yield "It's sentence produced by that bug."
for _ in range(10001):
yield "I erase some hbdsaj lemmas."
for _ in range(10001):
yield "I erase lemmas."

View File

@ -469,7 +469,8 @@ cdef class Vocab:
for k in keys:
del self._by_hash[k]
self._by_orth = PreshMap()
if len(strings) != 0:
self._by_orth = PreshMap()
def pickle_vocab(vocab):