mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
* Fix zero-length spans
This commit is contained in:
parent
888c05a7fa
commit
4b3c96d76d
|
@ -23,9 +23,12 @@ cdef class Span:
|
||||||
|
|
||||||
self.doc = tokens
|
self.doc = tokens
|
||||||
self.start = start
|
self.start = start
|
||||||
self.start_char = self.doc[start].idx
|
self.start_char = self.doc[start].idx if start < self.doc.length else 0
|
||||||
self.end = end
|
self.end = end
|
||||||
|
if end >= 1:
|
||||||
self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1])
|
self.end_char = self.doc[end - 1].idx + len(self.doc[end - 1])
|
||||||
|
else:
|
||||||
|
self.end_char = 0
|
||||||
self.label = label
|
self.label = label
|
||||||
self._vector = vector
|
self._vector = vector
|
||||||
self._vector_norm = vector_norm
|
self._vector_norm = vector_norm
|
||||||
|
@ -81,7 +84,7 @@ cdef class Span:
|
||||||
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
return numpy.dot(self.vector, other.vector) / (self.vector_norm * other.vector_norm)
|
||||||
|
|
||||||
cpdef int _recalculate_indices(self) except -1:
|
cpdef int _recalculate_indices(self) except -1:
|
||||||
if self.end >= self.doc.length \
|
if self.end > self.doc.length \
|
||||||
or self.doc.c[self.start].idx != self.start_char \
|
or self.doc.c[self.start].idx != self.start_char \
|
||||||
or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char:
|
or (self.doc.c[self.end-1].idx + self.doc.c[self.end-1].lex.length) != self.end_char:
|
||||||
start = token_by_start(self.doc.c, self.doc.length, self.start_char)
|
start = token_by_start(self.doc.c, self.doc.length, self.start_char)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user