Allow span label to be string in Doc.char_span

This commit is contained in:
Matthew Honnibal 2017-08-19 16:18:09 +02:00
parent 97aabafb5f
commit 8b7ac77c23

View File

@ -238,16 +238,18 @@ cdef class Doc:
def doc(self):
return self
def char_span(self, int start_idx, int end_idx, attr_t label=0, vector=None):
def char_span(self, int start_idx, int end_idx, label=0, vector=None):
"""Create a `Span` object from the slice `doc.text[start : end]`.
doc (Doc): The parent document.
start (int): The index of the first character of the span.
end (int): The index of the first character after the span.
label (uint64): A label to attach to the Span, e.g. for named entities.
label (uint64 or string): A label to attach to the Span, e.g. for named entities.
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
RETURNS (Span): The newly constructed object.
"""
if not isinstance(label, int):
label = self.vocab.strings.add(label)
cdef int start = token_by_start(self.c, self.length, start_idx)
if start == -1:
return None