mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Try to hold origin data instead of restore it
This commit is contained in:
parent
91e2fa6561
commit
a33d5a068d
|
@ -559,14 +559,10 @@ class Language(object):
|
||||||
nr_seen += 1
|
nr_seen += 1
|
||||||
elif len(old_refs) == 0:
|
elif len(old_refs) == 0:
|
||||||
old_refs, recent_refs = recent_refs, old_refs
|
old_refs, recent_refs = recent_refs, old_refs
|
||||||
keys, strings = self.vocab.strings._cleanup_stale_strings()
|
keys, strings = self.vocab.strings._cleanup_stale_strings(original_strings_data)
|
||||||
self.vocab._reset_cache(keys, strings)
|
self.vocab._reset_cache(keys, strings)
|
||||||
self.tokenizer._reset_cache(keys)
|
self.tokenizer._reset_cache(keys)
|
||||||
for string in original_strings_data:
|
|
||||||
self.vocab.strings.add(string)
|
|
||||||
nr_seen = 0
|
nr_seen = 0
|
||||||
# We can't know which strings from the last batch have really expired.
|
|
||||||
# So we don't erase the strings.
|
|
||||||
|
|
||||||
def to_disk(self, path, disable=tuple()):
|
def to_disk(self, path, disable=tuple()):
|
||||||
"""Save the current state to a directory. If a model is loaded, this
|
"""Save the current state to a directory. If a model is loaded, this
|
||||||
|
|
|
@ -249,7 +249,7 @@ cdef class StringStore:
|
||||||
for string in strings:
|
for string in strings:
|
||||||
self.add(string)
|
self.add(string)
|
||||||
|
|
||||||
def _cleanup_stale_strings(self):
|
def _cleanup_stale_strings(self, excepted):
|
||||||
"""
|
"""
|
||||||
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
|
RETURNS (keys, strings): Dropped strings and keys that can be dropped from other places
|
||||||
"""
|
"""
|
||||||
|
@ -262,11 +262,12 @@ cdef class StringStore:
|
||||||
dropped_keys = []
|
dropped_keys = []
|
||||||
for i in range(self.keys.size()):
|
for i in range(self.keys.size()):
|
||||||
key = self.keys[i]
|
key = self.keys[i]
|
||||||
if self.hits.count(key) != 0:
|
value = self[key]
|
||||||
|
if self.hits.count(key) != 0 or value in excepted:
|
||||||
tmp.push_back(key)
|
tmp.push_back(key)
|
||||||
else:
|
else:
|
||||||
dropped_keys.append(key)
|
dropped_keys.append(key)
|
||||||
dropped_strings.append(self[key])
|
dropped_strings.append(value)
|
||||||
|
|
||||||
self.keys.swap(tmp)
|
self.keys.swap(tmp)
|
||||||
strings = list(self)
|
strings = list(self)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user