mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
add char_span to Span (#4793)
This commit is contained in:
parent
f9b541f9ef
commit
1707e77c5e
|
@ -32,6 +32,24 @@ def doc_not_parsed(en_tokenizer):
|
|||
return doc
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"i_sent,i,j,text",
|
||||
[
|
||||
(0, 0, len("This is a"), "This is a"),
|
||||
(1, 0, len("This is another"), "This is another"),
|
||||
(2, len("And "), len("And ") + len("a third"), "a third"),
|
||||
(0, 1, 2, None),
|
||||
],
|
||||
)
|
||||
def test_char_span(doc, i_sent, i, j, text):
|
||||
sents = list(doc.sents)
|
||||
span = sents[i_sent].char_span(i, j)
|
||||
if not text:
|
||||
assert not span
|
||||
else:
|
||||
assert span.text == text
|
||||
|
||||
|
||||
def test_spans_sent_spans(doc):
|
||||
sents = list(doc.sents)
|
||||
assert sents[0].start == 0
|
||||
|
|
|
@ -584,6 +584,22 @@ cdef class Span:
|
|||
else:
|
||||
return self.doc[root]
|
||||
|
||||
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None):
|
||||
"""Create a `Span` object from the slice `span.text[start : end]`.
|
||||
|
||||
start (int): The index of the first character of the span.
|
||||
end (int): The index of the first character after the span.
|
||||
label (uint64 or string): A label to attach to the Span, e.g. for
|
||||
named entities.
|
||||
kb_id (uint64 or string): An ID from a KB to capture the meaning of a named entity.
|
||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
||||
the span.
|
||||
RETURNS (Span): The newly constructed object.
|
||||
"""
|
||||
start_idx += self.start_char
|
||||
end_idx += self.start_char
|
||||
return self.doc.char_span(start_idx, end_idx)
|
||||
|
||||
@property
|
||||
def conjuncts(self):
|
||||
"""Tokens that are conjoined to the span's root.
|
||||
|
|
Loading…
Reference in New Issue
Block a user