mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-03 13:14:11 +03:00
StringStore now actually cleaned
Do not lose docs in ref tracking
This commit is contained in:
parent
378280039b
commit
a2745b0e84
|
@ -558,6 +558,7 @@ class Language(object):
|
|||
old_refs.add(doc)
|
||||
nr_seen += 1
|
||||
elif len(old_refs) == 0:
|
||||
old_refs, recent_refs = recent_refs, old_refs
|
||||
self.vocab.strings._cleanup_stale_strings()
|
||||
nr_seen = 0
|
||||
# Last batch can be not garbage collected and we cannot know it — last
|
||||
|
|
|
@ -260,6 +260,9 @@ cdef class StringStore:
|
|||
if self.hits.count(key) != 0:
|
||||
tmp.push_back(key)
|
||||
|
||||
strings = list(self)
|
||||
self._reset_and_load(strings)
|
||||
|
||||
self.keys.swap(tmp)
|
||||
self.hits.clear()
|
||||
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import gc
|
||||
|
||||
from ...lang.en import English
|
||||
|
||||
|
||||
|
@ -11,12 +13,26 @@ def test_issue1506():
|
|||
for _ in range(10001):
|
||||
yield "It's sentence produced by that bug."
|
||||
|
||||
yield "Oh snap."
|
||||
|
||||
for _ in range(10001):
|
||||
yield "I erase lemmas."
|
||||
|
||||
for _ in range(10001):
|
||||
yield "It's sentence produced by that bug."
|
||||
|
||||
for d in nlp.pipe(string_generator()):
|
||||
for t in d:
|
||||
str(t.lemma_)
|
||||
for _ in range(10001):
|
||||
yield "It's sentence produced by that bug."
|
||||
|
||||
anchor = None
|
||||
remember = None
|
||||
for i, d in enumerate(nlp.pipe(string_generator())):
|
||||
if i == 9999:
|
||||
anchor = d
|
||||
elif 10001 == i:
|
||||
remember = d
|
||||
elif i == 10002:
|
||||
del anchor
|
||||
gc.collect()
|
||||
|
||||
assert remember.text == 'Oh snap.'
|
||||
|
|
Loading…
Reference in New Issue
Block a user