mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 02:06:31 +03:00
Add span_id to Span.char_span, update Doc/Span.char_span docs (#12196)
* Add span_id to Span.char_span, update Doc/Span.char_span docs `Span.char_span(id=)` should be removed in the future. * Also use Union[int, str] in Doc docstring
This commit is contained in:
parent
774c10fa39
commit
5f8a398bb9
|
@ -163,6 +163,18 @@ def test_char_span(doc, i_sent, i, j, text):
|
||||||
assert span.text == text
|
assert span.text == text
|
||||||
|
|
||||||
|
|
||||||
|
def test_char_span_attributes(doc):
|
||||||
|
label = "LABEL"
|
||||||
|
kb_id = "KB_ID"
|
||||||
|
span_id = "SPAN_ID"
|
||||||
|
span1 = doc.char_span(20, 45, label=label, kb_id=kb_id, span_id=span_id)
|
||||||
|
span2 = doc[1:].char_span(15, 40, label=label, kb_id=kb_id, span_id=span_id)
|
||||||
|
assert span1.text == span2.text
|
||||||
|
assert span1.label_ == span2.label_ == label
|
||||||
|
assert span1.kb_id_ == span2.kb_id_ == kb_id
|
||||||
|
assert span1.id_ == span2.id_ == span_id
|
||||||
|
|
||||||
|
|
||||||
def test_spans_sent_spans(doc):
|
def test_spans_sent_spans(doc):
|
||||||
sents = list(doc.sents)
|
sents = list(doc.sents)
|
||||||
assert sents[0].start == 0
|
assert sents[0].start == 0
|
||||||
|
|
|
@ -108,6 +108,7 @@ class Doc:
|
||||||
kb_id: Union[int, str] = ...,
|
kb_id: Union[int, str] = ...,
|
||||||
vector: Optional[Floats1d] = ...,
|
vector: Optional[Floats1d] = ...,
|
||||||
alignment_mode: str = ...,
|
alignment_mode: str = ...,
|
||||||
|
span_id: Union[int, str] = ...,
|
||||||
) -> Span: ...
|
) -> Span: ...
|
||||||
def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
|
def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
|
||||||
@property
|
@property
|
||||||
|
|
|
@ -528,9 +528,9 @@ cdef class Doc:
|
||||||
doc (Doc): The parent document.
|
doc (Doc): The parent document.
|
||||||
start_idx (int): The index of the first character of the span.
|
start_idx (int): The index of the first character of the span.
|
||||||
end_idx (int): The index of the first character after the span.
|
end_idx (int): The index of the first character after the span.
|
||||||
label (uint64 or string): A label to attach to the Span, e.g. for
|
label (Union[int, str]): A label to attach to the Span, e.g. for
|
||||||
named entities.
|
named entities.
|
||||||
kb_id (uint64 or string): An ID from a KB to capture the meaning of a
|
kb_id (Union[int, str]): An ID from a KB to capture the meaning of a
|
||||||
named entity.
|
named entity.
|
||||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
||||||
the span.
|
the span.
|
||||||
|
@ -539,6 +539,7 @@ cdef class Doc:
|
||||||
with token boundaries), "contract" (span of all tokens completely
|
with token boundaries), "contract" (span of all tokens completely
|
||||||
within the character span), "expand" (span of all tokens at least
|
within the character span), "expand" (span of all tokens at least
|
||||||
partially covered by the character span). Defaults to "strict".
|
partially covered by the character span). Defaults to "strict".
|
||||||
|
span_id (Union[int, str]): An identifier to associate with the span.
|
||||||
RETURNS (Span): The newly constructed object.
|
RETURNS (Span): The newly constructed object.
|
||||||
|
|
||||||
DOCS: https://spacy.io/api/doc#char_span
|
DOCS: https://spacy.io/api/doc#char_span
|
||||||
|
|
|
@ -98,7 +98,9 @@ class Span:
|
||||||
label: Union[int, str] = ...,
|
label: Union[int, str] = ...,
|
||||||
kb_id: Union[int, str] = ...,
|
kb_id: Union[int, str] = ...,
|
||||||
vector: Optional[Floats1d] = ...,
|
vector: Optional[Floats1d] = ...,
|
||||||
|
id: Union[int, str] = ...,
|
||||||
alignment_mode: str = ...,
|
alignment_mode: str = ...,
|
||||||
|
span_id: Union[int, str] = ...,
|
||||||
) -> Span: ...
|
) -> Span: ...
|
||||||
@property
|
@property
|
||||||
def conjuncts(self) -> Tuple[Token]: ...
|
def conjuncts(self) -> Tuple[Token]: ...
|
||||||
|
|
|
@ -639,26 +639,28 @@ cdef class Span:
|
||||||
else:
|
else:
|
||||||
return self.doc[root]
|
return self.doc[root]
|
||||||
|
|
||||||
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict"):
|
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, id=0, alignment_mode="strict", span_id=0):
|
||||||
"""Create a `Span` object from the slice `span.text[start : end]`.
|
"""Create a `Span` object from the slice `span.text[start : end]`.
|
||||||
|
|
||||||
start (int): The index of the first character of the span.
|
start (int): The index of the first character of the span.
|
||||||
end (int): The index of the first character after the span.
|
end (int): The index of the first character after the span.
|
||||||
label (uint64 or string): A label to attach to the Span, e.g. for
|
label (Union[int, str]): A label to attach to the Span, e.g. for
|
||||||
named entities.
|
named entities.
|
||||||
kb_id (uint64 or string): An ID from a KB to capture the meaning of a named entity.
|
kb_id (Union[int, str]): An ID from a KB to capture the meaning of a named entity.
|
||||||
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
vector (ndarray[ndim=1, dtype='float32']): A meaning representation of
|
||||||
the span.
|
the span.
|
||||||
|
id (Union[int, str]): Unused.
|
||||||
alignment_mode (str): How character indices are aligned to token
|
alignment_mode (str): How character indices are aligned to token
|
||||||
boundaries. Options: "strict" (character indices must be aligned
|
boundaries. Options: "strict" (character indices must be aligned
|
||||||
with token boundaries), "contract" (span of all tokens completely
|
with token boundaries), "contract" (span of all tokens completely
|
||||||
within the character span), "expand" (span of all tokens at least
|
within the character span), "expand" (span of all tokens at least
|
||||||
partially covered by the character span). Defaults to "strict".
|
partially covered by the character span). Defaults to "strict".
|
||||||
|
span_id (Union[int, str]): An identifier to associate with the span.
|
||||||
RETURNS (Span): The newly constructed object.
|
RETURNS (Span): The newly constructed object.
|
||||||
"""
|
"""
|
||||||
start_idx += self.c.start_char
|
start_idx += self.c.start_char
|
||||||
end_idx += self.c.start_char
|
end_idx += self.c.start_char
|
||||||
return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode)
|
return self.doc.char_span(start_idx, end_idx, label=label, kb_id=kb_id, vector=vector, alignment_mode=alignment_mode, span_id=span_id)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def conjuncts(self):
|
def conjuncts(self):
|
||||||
|
|
|
@ -209,15 +209,16 @@ alignment mode `"strict".
|
||||||
> assert span.text == "New York"
|
> assert span.text == "New York"
|
||||||
> ```
|
> ```
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `start` | The index of the first character of the span. ~~int~~ |
|
| `start` | The index of the first character of the span. ~~int~~ |
|
||||||
| `end` | The index of the last character after the span. ~~int~~ |
|
| `end` | The index of the last character after the span. ~~int~~ |
|
||||||
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
||||||
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
||||||
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
||||||
| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
||||||
| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~ |
|
| `span_id` <Tag variant="new">3.3.1</Tag> | An identifier to associate with the span. ~~Union[int, str]~~ |
|
||||||
|
| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~ |
|
||||||
|
|
||||||
## Doc.set_ents {id="set_ents",tag="method",version="3"}
|
## Doc.set_ents {id="set_ents",tag="method",version="3"}
|
||||||
|
|
||||||
|
|
|
@ -193,7 +193,9 @@ the character indices don't map to a valid span.
|
||||||
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
||||||
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
||||||
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
||||||
|
| `id` | Unused. ~~Union[int, str]~~ |
|
||||||
| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
||||||
|
| `span_id` <Tag variant="new">3.5.1</Tag> | An identifier to associate with the span. ~~Union[int, str]~~ |
|
||||||
| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~ |
|
| **RETURNS** | The newly constructed object or `None`. ~~Optional[Span]~~ |
|
||||||
|
|
||||||
## Span.similarity {id="similarity",tag="method",model="vectors"}
|
## Span.similarity {id="similarity",tag="method",model="vectors"}
|
||||||
|
|
Loading…
Reference in New Issue
Block a user