mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	Remove caching of Token in Doc, as caused cycle.
This commit is contained in:
		
							parent
							
								
									3e037054c8
								
							
						
					
					
						commit
						a002264fec
					
				|  | @ -140,7 +140,6 @@ cdef class Doc: | ||||||
|         self.user_span_hooks = {} |         self.user_span_hooks = {} | ||||||
|         self.tensor = numpy.zeros((0,), dtype='float32') |         self.tensor = numpy.zeros((0,), dtype='float32') | ||||||
|         self.user_data = {} |         self.user_data = {} | ||||||
|         self._py_tokens = [] |  | ||||||
|         self._vector = None |         self._vector = None | ||||||
|         self.noun_chunks_iterator = _get_chunker(self.vocab.lang) |         self.noun_chunks_iterator = _get_chunker(self.vocab.lang) | ||||||
|         cdef unicode orth |         cdef unicode orth | ||||||
|  | @ -209,10 +208,7 @@ cdef class Doc: | ||||||
|         if i < 0: |         if i < 0: | ||||||
|             i = self.length + i |             i = self.length + i | ||||||
|         bounds_check(i, self.length, PADDING) |         bounds_check(i, self.length, PADDING) | ||||||
|         if self._py_tokens[i] is not None: |         return Token.cinit(self.vocab, &self.c[i], i, self) | ||||||
|             return self._py_tokens[i] |  | ||||||
|         else: |  | ||||||
|             return Token.cinit(self.vocab, &self.c[i], i, self) |  | ||||||
| 
 | 
 | ||||||
|     def __iter__(self): |     def __iter__(self): | ||||||
|         """Iterate over `Token`  objects, from which the annotations can be |         """Iterate over `Token`  objects, from which the annotations can be | ||||||
|  | @ -226,10 +222,7 @@ cdef class Doc: | ||||||
|         """ |         """ | ||||||
|         cdef int i |         cdef int i | ||||||
|         for i in range(self.length): |         for i in range(self.length): | ||||||
|             if self._py_tokens[i] is not None: |             yield Token.cinit(self.vocab, &self.c[i], i, self) | ||||||
|                 yield self._py_tokens[i] |  | ||||||
|             else: |  | ||||||
|                 yield Token.cinit(self.vocab, &self.c[i], i, self) |  | ||||||
| 
 | 
 | ||||||
|     def __len__(self): |     def __len__(self): | ||||||
|         """The number of tokens in the document. |         """The number of tokens in the document. | ||||||
|  | @ -535,7 +528,6 @@ cdef class Doc: | ||||||
|         self.length += 1 |         self.length += 1 | ||||||
|         # Set morphological attributes, e.g. by lemma, if possible |         # Set morphological attributes, e.g. by lemma, if possible | ||||||
|         self.vocab.morphology.assign_untagged(t) |         self.vocab.morphology.assign_untagged(t) | ||||||
|         self._py_tokens.append(None) |  | ||||||
|         return t.idx + t.lex.length + t.spacy |         return t.idx + t.lex.length + t.spacy | ||||||
| 
 | 
 | ||||||
|     @cython.boundscheck(False) |     @cython.boundscheck(False) | ||||||
|  | @ -841,7 +833,6 @@ cdef class Doc: | ||||||
|         # Set the left/right children, left/right edges |         # Set the left/right children, left/right edges | ||||||
|         set_children_from_heads(self.c, self.length) |         set_children_from_heads(self.c, self.length) | ||||||
|         # Clear the cached Python objects |         # Clear the cached Python objects | ||||||
|         self._py_tokens = [None] * self.length |  | ||||||
|         # Return the merged Python object |         # Return the merged Python object | ||||||
|         return self[start] |         return self[start] | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -19,10 +19,7 @@ cdef class Token: | ||||||
|         if offset < 0 or offset >= doc.length: |         if offset < 0 or offset >= doc.length: | ||||||
|             msg = "Attempt to access token at %d, max length %d" |             msg = "Attempt to access token at %d, max length %d" | ||||||
|             raise IndexError(msg % (offset, doc.length)) |             raise IndexError(msg % (offset, doc.length)) | ||||||
|         if doc._py_tokens[offset] != None: |  | ||||||
|             return doc._py_tokens[offset] |  | ||||||
|         cdef Token self = Token.__new__(Token, vocab, doc, offset) |         cdef Token self = Token.__new__(Token, vocab, doc, offset) | ||||||
|         doc._py_tokens[offset] = self |  | ||||||
|         return self |         return self | ||||||
| 
 | 
 | ||||||
|     #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs): |     #cdef inline TokenC struct_from_attrs(Vocab vocab, attrs): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user