mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 01:16:28 +03:00
Use safer method to get string without hit
This commit is contained in:
parent
a33d5a068d
commit
3e21680814
|
@ -251,6 +251,7 @@ cdef class StringStore:
|
|||
|
||||
def _cleanup_stale_strings(self, excepted):
|
||||
"""
|
||||
excepted (list): Strings that should not be removed.
|
||||
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
|
||||
"""
|
||||
if self.hits.size() == 0:
|
||||
|
@ -262,7 +263,9 @@ cdef class StringStore:
|
|||
dropped_keys = []
|
||||
for i in range(self.keys.size()):
|
||||
key = self.keys[i]
|
||||
value = self[key]
|
||||
# Here we cannot use __getitem__ because it also set hit.
|
||||
utf8str = <Utf8Str*>self._map.get(key)
|
||||
value = decode_Utf8Str(utf8str)
|
||||
if self.hits.count(key) != 0 or value in excepted:
|
||||
tmp.push_back(key)
|
||||
else:
|
||||
|
|
|
@ -13,6 +13,9 @@ def test_issue1506():
|
|||
for _ in range(10001):
|
||||
yield "It's sentence produced by that bug."
|
||||
|
||||
for _ in range(10001):
|
||||
yield "I erase some hbdsaj lemmas."
|
||||
|
||||
for _ in range(10001):
|
||||
yield "I erase lemmas."
|
||||
|
||||
|
|
|
@ -469,7 +469,8 @@ cdef class Vocab:
|
|||
for k in keys:
|
||||
del self._by_hash[k]
|
||||
|
||||
self._by_orth = PreshMap()
|
||||
if len(strings) != 0:
|
||||
self._by_orth = PreshMap()
|
||||
|
||||
|
||||
def pickle_vocab(vocab):
|
||||
|
|
Loading…
Reference in New Issue
Block a user