mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Use safer method to get string without hit
This commit is contained in:
parent
a33d5a068d
commit
3e21680814
|
@ -251,6 +251,7 @@ cdef class StringStore:
|
||||||
|
|
||||||
def _cleanup_stale_strings(self, excepted):
|
def _cleanup_stale_strings(self, excepted):
|
||||||
"""
|
"""
|
||||||
|
excepted (list): Strings that should not be removed.
|
||||||
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
|
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
|
||||||
"""
|
"""
|
||||||
if self.hits.size() == 0:
|
if self.hits.size() == 0:
|
||||||
|
@ -262,7 +263,9 @@ cdef class StringStore:
|
||||||
dropped_keys = []
|
dropped_keys = []
|
||||||
for i in range(self.keys.size()):
|
for i in range(self.keys.size()):
|
||||||
key = self.keys[i]
|
key = self.keys[i]
|
||||||
value = self[key]
|
# Here we cannot use __getitem__ because it also set hit.
|
||||||
|
utf8str = <Utf8Str*>self._map.get(key)
|
||||||
|
value = decode_Utf8Str(utf8str)
|
||||||
if self.hits.count(key) != 0 or value in excepted:
|
if self.hits.count(key) != 0 or value in excepted:
|
||||||
tmp.push_back(key)
|
tmp.push_back(key)
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -13,6 +13,9 @@ def test_issue1506():
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
yield "It's sentence produced by that bug."
|
yield "It's sentence produced by that bug."
|
||||||
|
|
||||||
|
for _ in range(10001):
|
||||||
|
yield "I erase some hbdsaj lemmas."
|
||||||
|
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
yield "I erase lemmas."
|
yield "I erase lemmas."
|
||||||
|
|
||||||
|
|
|
@ -469,7 +469,8 @@ cdef class Vocab:
|
||||||
for k in keys:
|
for k in keys:
|
||||||
del self._by_hash[k]
|
del self._by_hash[k]
|
||||||
|
|
||||||
self._by_orth = PreshMap()
|
if len(strings) != 0:
|
||||||
|
self._by_orth = PreshMap()
|
||||||
|
|
||||||
|
|
||||||
def pickle_vocab(vocab):
|
def pickle_vocab(vocab):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user