mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Add Doc.char_span method, to get a span by character offset
This commit is contained in:
parent
482bba1722
commit
80236116a6
|
@ -238,6 +238,27 @@ cdef class Doc:
|
|||
def doc(self):
|
||||
return self
|
||||
|
||||
def char_span(self, int start_idx, int end_idx, attr_t label=0, vector=None):
|
||||
"""Create a `Span` object from the slice `doc.text[start : end]`.
|
||||
|
||||
doc (Doc): The parent document.
|
||||
start (int): The index of the first character of the span.
|
||||
end (int): The index of the first character after the span.
|
||||
label (uint64): A label to attach to the Span, e.g. for named entities.
|
||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of the span.
|
||||
RETURNS (Span): The newly constructed object.
|
||||
"""
|
||||
cdef int start = token_by_start(self.c, self.length, start_idx)
|
||||
if start == -1:
|
||||
return None
|
||||
cdef int end = token_by_end(self.c, self.length, end_idx)
|
||||
if end == -1:
|
||||
return None
|
||||
# Currently we have the token index, we want the range-end index
|
||||
end += 1
|
||||
cdef Span span = Span(self, start, end, label=label, vector=vector)
|
||||
return span
|
||||
|
||||
def similarity(self, other):
|
||||
"""Make a semantic similarity estimate. The default estimate is cosine
|
||||
similarity using an average of word vectors.
|
||||
|
|
Loading…
Reference in New Issue
Block a user