mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-09 16:10:33 +03:00
StringStore now actually cleaned
Do not lose docs in ref tracking
This commit is contained in:
parent
378280039b
commit
a2745b0e84
|
@ -558,6 +558,7 @@ class Language(object):
|
||||||
old_refs.add(doc)
|
old_refs.add(doc)
|
||||||
nr_seen += 1
|
nr_seen += 1
|
||||||
elif len(old_refs) == 0:
|
elif len(old_refs) == 0:
|
||||||
|
old_refs, recent_refs = recent_refs, old_refs
|
||||||
self.vocab.strings._cleanup_stale_strings()
|
self.vocab.strings._cleanup_stale_strings()
|
||||||
nr_seen = 0
|
nr_seen = 0
|
||||||
# Last batch can be not garbage collected and we cannot know it — last
|
# Last batch can be not garbage collected and we cannot know it — last
|
||||||
|
|
|
@ -260,6 +260,9 @@ cdef class StringStore:
|
||||||
if self.hits.count(key) != 0:
|
if self.hits.count(key) != 0:
|
||||||
tmp.push_back(key)
|
tmp.push_back(key)
|
||||||
|
|
||||||
|
strings = list(self)
|
||||||
|
self._reset_and_load(strings)
|
||||||
|
|
||||||
self.keys.swap(tmp)
|
self.keys.swap(tmp)
|
||||||
self.hits.clear()
|
self.hits.clear()
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
# coding: utf8
|
# coding: utf8
|
||||||
from __future__ import unicode_literals
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import gc
|
||||||
|
|
||||||
from ...lang.en import English
|
from ...lang.en import English
|
||||||
|
|
||||||
|
|
||||||
|
@ -11,12 +13,26 @@ def test_issue1506():
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
yield "It's sentence produced by that bug."
|
yield "It's sentence produced by that bug."
|
||||||
|
|
||||||
|
yield "Oh snap."
|
||||||
|
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
yield "I erase lemmas."
|
yield "I erase lemmas."
|
||||||
|
|
||||||
for _ in range(10001):
|
for _ in range(10001):
|
||||||
yield "It's sentence produced by that bug."
|
yield "It's sentence produced by that bug."
|
||||||
|
|
||||||
for d in nlp.pipe(string_generator()):
|
for _ in range(10001):
|
||||||
for t in d:
|
yield "It's sentence produced by that bug."
|
||||||
str(t.lemma_)
|
|
||||||
|
anchor = None
|
||||||
|
remember = None
|
||||||
|
for i, d in enumerate(nlp.pipe(string_generator())):
|
||||||
|
if i == 9999:
|
||||||
|
anchor = d
|
||||||
|
elif 10001 == i:
|
||||||
|
remember = d
|
||||||
|
elif i == 10002:
|
||||||
|
del anchor
|
||||||
|
gc.collect()
|
||||||
|
|
||||||
|
assert remember.text == 'Oh snap.'
|
||||||
|
|
Loading…
Reference in New Issue
Block a user