StringStore now actually cleaned

Do not lose docs in ref tracking
This commit is contained in:
Roman Domrachev 2017-11-14 17:45:50 +03:00
parent 378280039b
commit a2745b0e84
3 changed files with 23 additions and 3 deletions

View File

@ -558,6 +558,7 @@ class Language(object):
old_refs.add(doc)
nr_seen += 1
elif len(old_refs) == 0:
old_refs, recent_refs = recent_refs, old_refs
self.vocab.strings._cleanup_stale_strings()
nr_seen = 0
# Last batch can be not garbage collected and we cannot know it — last

View File

@ -260,6 +260,9 @@ cdef class StringStore:
if self.hits.count(key) != 0:
tmp.push_back(key)
strings = list(self)
self._reset_and_load(strings)
self.keys.swap(tmp)
self.hits.clear()

View File

@ -1,6 +1,8 @@
# coding: utf8
from __future__ import unicode_literals
import gc
from ...lang.en import English
@ -11,12 +13,26 @@ def test_issue1506():
for _ in range(10001):
yield "It's sentence produced by that bug."
yield "Oh snap."
for _ in range(10001):
yield "I erase lemmas."
for _ in range(10001):
yield "It's sentence produced by that bug."
for d in nlp.pipe(string_generator()):
for t in d:
str(t.lemma_)
for _ in range(10001):
yield "It's sentence produced by that bug."
anchor = None
remember = None
for i, d in enumerate(nlp.pipe(string_generator())):
if i == 9999:
anchor = d
elif 10001 == i:
remember = d
elif i == 10002:
del anchor
gc.collect()
assert remember.text == 'Oh snap.'