diff --git a/spacy/language.py b/spacy/language.py index 739e7665d..cacce85c7 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -560,9 +560,9 @@ class Language(object): elif len(old_refs) == 0: self.vocab.strings._cleanup_stale_strings() nr_seen = 0 - # Last batch can be not garbage collected and we cannot know it — last - # doc still here. Not erase that strings — just extend with original - # content + # We can't know which strings from the last batch have really expired. + # So we don't erase the strings — we just extend with the original + # content. for string in original_strings_data: self.vocab.strings.add(string)