mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-29 06:57:49 +03:00 
			
		
		
		
	Merge pull request #6232 from adrianeboyd/feature/remove-span-recalculate
Remove Span._recalculate_indices
This commit is contained in:
		
						commit
						c23ce1ae71
					
				|  | @ -608,14 +608,11 @@ def test_doc_init_iob(): | |||
|         doc = Doc(Vocab(), words=words, ents=ents) | ||||
| 
 | ||||
| 
 | ||||
| @pytest.mark.xfail | ||||
| def test_doc_set_ents_spans(en_tokenizer): | ||||
| def test_doc_set_ents_invalid_spans(en_tokenizer): | ||||
|     doc = en_tokenizer("Some text about Colombia and the Czech Republic") | ||||
|     spans = [Span(doc, 3, 4, label="GPE"), Span(doc, 6, 8, label="GPE")] | ||||
|     with doc.retokenize() as retokenizer: | ||||
|         for span in spans: | ||||
|             retokenizer.merge(span) | ||||
|     # If this line is uncommented, it works: | ||||
|     # print(spans) | ||||
|     doc.ents = spans | ||||
|     assert [ent.text for ent in doc.ents] == ["Colombia", "Czech Republic"] | ||||
|     with pytest.raises(IndexError): | ||||
|         doc.ents = spans | ||||
|  |  | |||
|  | @ -336,6 +336,7 @@ def test_doc_retokenize_spans_sentence_update_after_merge(en_tokenizer): | |||
|         attrs = {"lemma": "none", "ent_type": "none"} | ||||
|         retokenizer.merge(doc[0:2], attrs=attrs) | ||||
|         retokenizer.merge(doc[-2:], attrs=attrs) | ||||
|     sent1, sent2 = list(doc.sents) | ||||
|     assert len(sent1) == init_len - 1 | ||||
|     assert len(sent2) == init_len2 - 1 | ||||
| 
 | ||||
|  |  | |||
|  | @ -16,5 +16,4 @@ cdef class Span: | |||
|     cdef public _vector | ||||
|     cdef public _vector_norm | ||||
| 
 | ||||
|     cpdef int _recalculate_indices(self) except -1 | ||||
|     cpdef np.ndarray to_array(self, object features) | ||||
|  |  | |||
|  | @ -150,7 +150,6 @@ cdef class Span: | |||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#len | ||||
|         """ | ||||
|         self._recalculate_indices() | ||||
|         if self.end < self.start: | ||||
|             return 0 | ||||
|         return self.end - self.start | ||||
|  | @ -167,7 +166,6 @@ cdef class Span: | |||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#getitem | ||||
|         """ | ||||
|         self._recalculate_indices() | ||||
|         if isinstance(i, slice): | ||||
|             start, end = normalize_slice(len(self), i.start, i.stop, i.step) | ||||
|             return Span(self.doc, start + self.start, end + self.start) | ||||
|  | @ -188,7 +186,6 @@ cdef class Span: | |||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#iter | ||||
|         """ | ||||
|         self._recalculate_indices() | ||||
|         for i in range(self.start, self.end): | ||||
|             yield self.doc[i] | ||||
| 
 | ||||
|  | @ -339,19 +336,6 @@ cdef class Span: | |||
|                 output[i-self.start, j] = get_token_attr(&self.doc.c[i], feature) | ||||
|         return output | ||||
| 
 | ||||
|     cpdef int _recalculate_indices(self) except -1: | ||||
|         if self.end > self.doc.length \ | ||||
|         or self.doc.c[self.start].idx != self.start_char \ | ||||
|         or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char: | ||||
|             start = token_by_start(self.doc.c, self.doc.length, self.start_char) | ||||
|             if self.start == -1: | ||||
|                 raise IndexError(Errors.E036.format(start=self.start_char)) | ||||
|             end = token_by_end(self.doc.c, self.doc.length, self.end_char) | ||||
|             if end == -1: | ||||
|                 raise IndexError(Errors.E037.format(end=self.end_char)) | ||||
|             self.start = start | ||||
|             self.end = end + 1 | ||||
| 
 | ||||
|     @property | ||||
|     def vocab(self): | ||||
|         """RETURNS (Vocab): The Span's Doc's vocab.""" | ||||
|  | @ -520,7 +504,6 @@ cdef class Span: | |||
| 
 | ||||
|         DOCS: https://nightly.spacy.io/api/span#root | ||||
|         """ | ||||
|         self._recalculate_indices() | ||||
|         if "root" in self.doc.user_span_hooks: | ||||
|             return self.doc.user_span_hooks["root"](self) | ||||
|         # This should probably be called 'head', and the other one called | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user