mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Fix/span.sent (#6083)
* add fail test * fix test * fix span.sent * Remove incorrect implicit check Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
4cbb954281
commit
3243ddac8f
|
@ -174,19 +174,25 @@ def test_spans_by_character(doc):
|
|||
assert span1.end_char == span2.end_char
|
||||
assert span2.label_ == "GPE"
|
||||
|
||||
span2 = doc.char_span(span1.start_char, span1.end_char, label="GPE", alignment_mode="strict")
|
||||
span2 = doc.char_span(
|
||||
span1.start_char, span1.end_char, label="GPE", alignment_mode="strict"
|
||||
)
|
||||
assert span1.start_char == span2.start_char
|
||||
assert span1.end_char == span2.end_char
|
||||
assert span2.label_ == "GPE"
|
||||
|
||||
# alignment mode "contract"
|
||||
span2 = doc.char_span(span1.start_char - 3, span1.end_char, label="GPE", alignment_mode="contract")
|
||||
span2 = doc.char_span(
|
||||
span1.start_char - 3, span1.end_char, label="GPE", alignment_mode="contract"
|
||||
)
|
||||
assert span1.start_char == span2.start_char
|
||||
assert span1.end_char == span2.end_char
|
||||
assert span2.label_ == "GPE"
|
||||
|
||||
# alignment mode "expand"
|
||||
span2 = doc.char_span(span1.start_char + 1, span1.end_char, label="GPE", alignment_mode="expand")
|
||||
span2 = doc.char_span(
|
||||
span1.start_char + 1, span1.end_char, label="GPE", alignment_mode="expand"
|
||||
)
|
||||
assert span1.start_char == span2.start_char
|
||||
assert span1.end_char == span2.end_char
|
||||
assert span2.label_ == "GPE"
|
||||
|
@ -318,3 +324,11 @@ def test_span_boundaries(doc):
|
|||
_ = span[-5]
|
||||
with pytest.raises(IndexError):
|
||||
_ = span[5]
|
||||
|
||||
|
||||
def test_sent(en_tokenizer):
|
||||
doc = en_tokenizer("Check span.sent raises error if doc is not sentencized.")
|
||||
span = doc[1:3]
|
||||
assert not span.doc.is_sentenced
|
||||
with pytest.raises(ValueError):
|
||||
span.sent
|
||||
|
|
|
@ -391,8 +391,6 @@ cdef class Span:
|
|||
"""RETURNS (Span): The sentence span that the span is a part of."""
|
||||
if "sent" in self.doc.user_span_hooks:
|
||||
return self.doc.user_span_hooks["sent"](self)
|
||||
# This should raise if not parsed / no custom sentence boundaries
|
||||
self.doc.sents
|
||||
# Use `sent_start` token attribute to find sentence boundaries
|
||||
cdef int n = 0
|
||||
if self.doc.is_sentenced:
|
||||
|
@ -402,13 +400,14 @@ cdef class Span:
|
|||
start += -1
|
||||
# Find end of the sentence
|
||||
end = self.end
|
||||
n = 0
|
||||
while end < self.doc.length and self.doc.c[end].sent_start != 1:
|
||||
end += 1
|
||||
n += 1
|
||||
if n >= self.doc.length:
|
||||
break
|
||||
return self.doc[start:end]
|
||||
else:
|
||||
raise ValueError(Errors.E030)
|
||||
|
||||
@property
|
||||
def ents(self):
|
||||
|
|
Loading…
Reference in New Issue
Block a user