Fix test imports and last batch cleanup

This commit is contained in:
Roman Domrachev 2017-11-11 11:31:59 +03:00
parent 4a6b094e09
commit ee60a52ee7
2 changed files with 11 additions and 13 deletions

View File

@ -560,7 +560,11 @@ class Language(object):
elif len(old_refs) == 0: elif len(old_refs) == 0:
self.vocab.strings._cleanup_stale_strings() self.vocab.strings._cleanup_stale_strings()
nr_seen = 0 nr_seen = 0
self.vocab.strings._reset_and_load(original_strings_data) # Last batch can be not garbage collected and we cannot know it — last
# doc still here. Not erase that strings — just extend with original
# content
for string in original_strings_data:
self.vocab.strings.add(string)
def to_disk(self, path, disable=tuple()): def to_disk(self, path, disable=tuple()):
"""Save the current state to a directory. If a model is loaded, this """Save the current state to a directory. If a model is loaded, this

View File

@ -1,12 +1,6 @@
# coding: utf8 # coding: utf8
from __future__ import unicode_literals from __future__ import unicode_literals
import random
import string
import itertools
from compat import izip
from ...lang.en import English from ...lang.en import English
@ -14,14 +8,14 @@ def test_issue1506():
nlp = English() nlp = English()
def string_generator(): def string_generator():
for (_, t) in izip(range(10001), itertools.repeat("It's sentence produced by that bug.")): for _ in range(10001):
yield t yield "It's sentence produced by that bug."
for (_, t) in izip(range(10001), itertools.repeat("I erase lemmas.")): for _ in range(10001):
yield t yield "I erase lemmas."
for (_, t) in izip(range(10001), itertools.repeat("It's sentence produced by that bug.")): for _ in range(10001):
yield t yield "It's sentence produced by that bug."
for d in nlp.pipe(string_generator()): for d in nlp.pipe(string_generator()):
for t in d: for t in d: