mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	StringStore now actually cleaned
Do not lose docs in ref tracking
This commit is contained in:
		
							parent
							
								
									378280039b
								
							
						
					
					
						commit
						a2745b0e84
					
				|  | @ -558,6 +558,7 @@ class Language(object): | |||
|                 old_refs.add(doc) | ||||
|                 nr_seen += 1 | ||||
|             elif len(old_refs) == 0: | ||||
|                 old_refs, recent_refs = recent_refs, old_refs | ||||
|                 self.vocab.strings._cleanup_stale_strings() | ||||
|                 nr_seen = 0 | ||||
|         # Last batch can be not garbage collected and we cannot know it — last | ||||
|  |  | |||
|  | @ -260,6 +260,9 @@ cdef class StringStore: | |||
|             if self.hits.count(key) != 0: | ||||
|                 tmp.push_back(key) | ||||
| 
 | ||||
|         strings = list(self) | ||||
|         self._reset_and_load(strings) | ||||
| 
 | ||||
|         self.keys.swap(tmp) | ||||
|         self.hits.clear() | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,6 +1,8 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import gc | ||||
| 
 | ||||
| from ...lang.en import English | ||||
| 
 | ||||
| 
 | ||||
|  | @ -11,12 +13,26 @@ def test_issue1506(): | |||
|         for _ in range(10001): | ||||
|             yield "It's sentence produced by that bug." | ||||
| 
 | ||||
|         yield "Oh snap." | ||||
| 
 | ||||
|         for _ in range(10001): | ||||
|             yield "I erase lemmas." | ||||
| 
 | ||||
|         for _ in range(10001): | ||||
|             yield "It's sentence produced by that bug." | ||||
| 
 | ||||
|     for d in nlp.pipe(string_generator()): | ||||
|         for t in d: | ||||
|             str(t.lemma_) | ||||
|         for _ in range(10001): | ||||
|             yield "It's sentence produced by that bug." | ||||
| 
 | ||||
|     anchor = None | ||||
|     remember = None | ||||
|     for i, d in enumerate(nlp.pipe(string_generator())): | ||||
|         if i == 9999: | ||||
|             anchor = d | ||||
|         elif 10001 == i: | ||||
|             remember = d | ||||
|         elif i == 10002: | ||||
|             del anchor | ||||
|             gc.collect() | ||||
| 
 | ||||
|     assert remember.text == 'Oh snap.' | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user