mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Remove Span._recalculate_indices
Remove `Span._recalculate_indices`, which is a remnant from the deprecated `Span.merge`.
This commit is contained in:
parent
4771a10503
commit
727370c633
|
@ -608,14 +608,11 @@ def test_doc_init_iob():
|
||||||
doc = Doc(Vocab(), words=words, ents=ents)
|
doc = Doc(Vocab(), words=words, ents=ents)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.xfail
|
def test_doc_set_ents_invalid_spans(en_tokenizer):
|
||||||
def test_doc_set_ents_spans(en_tokenizer):
|
|
||||||
doc = en_tokenizer("Some text about Colombia and the Czech Republic")
|
doc = en_tokenizer("Some text about Colombia and the Czech Republic")
|
||||||
spans = [Span(doc, 3, 4, label="GPE"), Span(doc, 6, 8, label="GPE")]
|
spans = [Span(doc, 3, 4, label="GPE"), Span(doc, 6, 8, label="GPE")]
|
||||||
with doc.retokenize() as retokenizer:
|
with doc.retokenize() as retokenizer:
|
||||||
for span in spans:
|
for span in spans:
|
||||||
retokenizer.merge(span)
|
retokenizer.merge(span)
|
||||||
# If this line is uncommented, it works:
|
with pytest.raises(IndexError):
|
||||||
# print(spans)
|
|
||||||
doc.ents = spans
|
doc.ents = spans
|
||||||
assert [ent.text for ent in doc.ents] == ["Colombia", "Czech Republic"]
|
|
||||||
|
|
|
@ -336,6 +336,7 @@ def test_doc_retokenize_spans_sentence_update_after_merge(en_tokenizer):
|
||||||
attrs = {"lemma": "none", "ent_type": "none"}
|
attrs = {"lemma": "none", "ent_type": "none"}
|
||||||
retokenizer.merge(doc[0:2], attrs=attrs)
|
retokenizer.merge(doc[0:2], attrs=attrs)
|
||||||
retokenizer.merge(doc[-2:], attrs=attrs)
|
retokenizer.merge(doc[-2:], attrs=attrs)
|
||||||
|
sent1, sent2 = list(doc.sents)
|
||||||
assert len(sent1) == init_len - 1
|
assert len(sent1) == init_len - 1
|
||||||
assert len(sent2) == init_len2 - 1
|
assert len(sent2) == init_len2 - 1
|
||||||
|
|
||||||
|
|
|
@ -16,5 +16,4 @@ cdef class Span:
|
||||||
cdef public _vector
|
cdef public _vector
|
||||||
cdef public _vector_norm
|
cdef public _vector_norm
|
||||||
|
|
||||||
cpdef int _recalculate_indices(self) except -1
|
|
||||||
cpdef np.ndarray to_array(self, object features)
|
cpdef np.ndarray to_array(self, object features)
|
||||||
|
|
|
@ -150,7 +150,6 @@ cdef class Span:
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/span#len
|
DOCS: https://nightly.spacy.io/api/span#len
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
|
||||||
if self.end < self.start:
|
if self.end < self.start:
|
||||||
return 0
|
return 0
|
||||||
return self.end - self.start
|
return self.end - self.start
|
||||||
|
@ -167,7 +166,6 @@ cdef class Span:
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/span#getitem
|
DOCS: https://nightly.spacy.io/api/span#getitem
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
|
||||||
if isinstance(i, slice):
|
if isinstance(i, slice):
|
||||||
start, end = normalize_slice(len(self), i.start, i.stop, i.step)
|
start, end = normalize_slice(len(self), i.start, i.stop, i.step)
|
||||||
return Span(self.doc, start + self.start, end + self.start)
|
return Span(self.doc, start + self.start, end + self.start)
|
||||||
|
@ -188,7 +186,6 @@ cdef class Span:
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/span#iter
|
DOCS: https://nightly.spacy.io/api/span#iter
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
|
||||||
for i in range(self.start, self.end):
|
for i in range(self.start, self.end):
|
||||||
yield self.doc[i]
|
yield self.doc[i]
|
||||||
|
|
||||||
|
@ -339,19 +336,6 @@ cdef class Span:
|
||||||
output[i-self.start, j] = get_token_attr(&self.doc.c[i], feature)
|
output[i-self.start, j] = get_token_attr(&self.doc.c[i], feature)
|
||||||
return output
|
return output
|
||||||
|
|
||||||
cpdef int _recalculate_indices(self) except -1:
|
|
||||||
if self.end > self.doc.length \
|
|
||||||
or self.doc.c[self.start].idx != self.start_char \
|
|
||||||
or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char:
|
|
||||||
start = token_by_start(self.doc.c, self.doc.length, self.start_char)
|
|
||||||
if self.start == -1:
|
|
||||||
raise IndexError(Errors.E036.format(start=self.start_char))
|
|
||||||
end = token_by_end(self.doc.c, self.doc.length, self.end_char)
|
|
||||||
if end == -1:
|
|
||||||
raise IndexError(Errors.E037.format(end=self.end_char))
|
|
||||||
self.start = start
|
|
||||||
self.end = end + 1
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def vocab(self):
|
def vocab(self):
|
||||||
"""RETURNS (Vocab): The Span's Doc's vocab."""
|
"""RETURNS (Vocab): The Span's Doc's vocab."""
|
||||||
|
@ -520,7 +504,6 @@ cdef class Span:
|
||||||
|
|
||||||
DOCS: https://nightly.spacy.io/api/span#root
|
DOCS: https://nightly.spacy.io/api/span#root
|
||||||
"""
|
"""
|
||||||
self._recalculate_indices()
|
|
||||||
if "root" in self.doc.user_span_hooks:
|
if "root" in self.doc.user_span_hooks:
|
||||||
return self.doc.user_span_hooks["root"](self)
|
return self.doc.user_span_hooks["root"](self)
|
||||||
# This should probably be called 'head', and the other one called
|
# This should probably be called 'head', and the other one called
|
||||||
|
|
Loading…
Reference in New Issue
Block a user