mirror of
https://github.com/explosion/spaCy.git
synced 2025-09-23 12:36:46 +03:00
correct check on sent_start & raise if no boundaries
This commit is contained in:
parent
b902731313
commit
deab391cbf
|
@ -291,6 +291,7 @@ cdef class Span:
|
||||||
# if doc is parsed we can use the deps to find the sentence
|
# if doc is parsed we can use the deps to find the sentence
|
||||||
# otherwise we use the `sent_start` token attribute
|
# otherwise we use the `sent_start` token attribute
|
||||||
cdef int n = 0
|
cdef int n = 0
|
||||||
|
cdef int i
|
||||||
if self.doc.is_parsed:
|
if self.doc.is_parsed:
|
||||||
root = &self.doc.c[self.start]
|
root = &self.doc.c[self.start]
|
||||||
while root.head != 0:
|
while root.head != 0:
|
||||||
|
@ -300,19 +301,30 @@ cdef class Span:
|
||||||
raise RuntimeError
|
raise RuntimeError
|
||||||
return self.doc[root.l_edge:root.r_edge + 1]
|
return self.doc[root.l_edge:root.r_edge + 1]
|
||||||
else:
|
else:
|
||||||
|
# Check if the document has sentence boundaries,
|
||||||
|
# i.e at least one tok has the sent_start == 1
|
||||||
|
for i in range(self.doc.length):
|
||||||
|
if self.doc.c[i].sent_start == 1:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"Access to sentence requires either the dependency parse "
|
||||||
|
"or sentence boundaries to be set by setting " +
|
||||||
|
"doc[i].is_sent_start = True")
|
||||||
# find start of the sentence
|
# find start of the sentence
|
||||||
start = self.start
|
start = self.start
|
||||||
while not self.doc.c[start].sent_start and start > 0:
|
while self.doc.c[start].sent_start != 1 and start > 0:
|
||||||
start += -1
|
start += -1
|
||||||
# find end of the sentence
|
# find end of the sentence
|
||||||
end = self.end
|
end = self.end
|
||||||
while not self.doc.c[end].sent_start:
|
while self.doc.c[end].sent_start != 1:
|
||||||
end += 1
|
end += 1
|
||||||
if n >= self.doc.length:
|
if n >= self.doc.length:
|
||||||
break
|
break
|
||||||
#
|
#
|
||||||
return self.doc[start:end]
|
return self.doc[start:end]
|
||||||
|
|
||||||
|
|
||||||
property has_vector:
|
property has_vector:
|
||||||
"""RETURNS (bool): Whether a word vector is associated with the object.
|
"""RETURNS (bool): Whether a word vector is associated with the object.
|
||||||
"""
|
"""
|
||||||
|
|
Loading…
Reference in New Issue
Block a user