mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
Make Span.char_span optional args keyword-only (#12257)
* Make Span.char_span optional args keyword-only * Make kb_id and following kw-only * Format
This commit is contained in:
parent
cbc2ae933e
commit
b95123060a
|
@ -105,6 +105,7 @@ class Doc:
|
||||||
start_idx: int,
|
start_idx: int,
|
||||||
end_idx: int,
|
end_idx: int,
|
||||||
label: Union[int, str] = ...,
|
label: Union[int, str] = ...,
|
||||||
|
*,
|
||||||
kb_id: Union[int, str] = ...,
|
kb_id: Union[int, str] = ...,
|
||||||
vector: Optional[Floats1d] = ...,
|
vector: Optional[Floats1d] = ...,
|
||||||
alignment_mode: str = ...,
|
alignment_mode: str = ...,
|
||||||
|
@ -127,7 +128,7 @@ class Doc:
|
||||||
blocked: Optional[List[Span]] = ...,
|
blocked: Optional[List[Span]] = ...,
|
||||||
missing: Optional[List[Span]] = ...,
|
missing: Optional[List[Span]] = ...,
|
||||||
outside: Optional[List[Span]] = ...,
|
outside: Optional[List[Span]] = ...,
|
||||||
default: str = ...
|
default: str = ...,
|
||||||
) -> None: ...
|
) -> None: ...
|
||||||
@property
|
@property
|
||||||
def noun_chunks(self) -> Iterator[Span]: ...
|
def noun_chunks(self) -> Iterator[Span]: ...
|
||||||
|
|
|
@ -520,7 +520,7 @@ cdef class Doc:
|
||||||
def doc(self):
|
def doc(self):
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
|
def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
|
||||||
"""Create a `Span` object from the slice
|
"""Create a `Span` object from the slice
|
||||||
`doc.text[start_idx : end_idx]`. Returns None if no valid `Span` can be
|
`doc.text[start_idx : end_idx]`. Returns None if no valid `Span` can be
|
||||||
created.
|
created.
|
||||||
|
@ -1605,7 +1605,7 @@ cdef class Doc:
|
||||||
for span_group in doc_json.get("spans", {}):
|
for span_group in doc_json.get("spans", {}):
|
||||||
spans = []
|
spans = []
|
||||||
for span in doc_json["spans"][span_group]:
|
for span in doc_json["spans"][span_group]:
|
||||||
char_span = self.char_span(span["start"], span["end"], span["label"], span["kb_id"])
|
char_span = self.char_span(span["start"], span["end"], span["label"], kb_id=span["kb_id"])
|
||||||
if char_span is None:
|
if char_span is None:
|
||||||
raise ValueError(Errors.E1039.format(obj="span", start=span["start"], end=span["end"]))
|
raise ValueError(Errors.E1039.format(obj="span", start=span["start"], end=span["end"]))
|
||||||
spans.append(char_span)
|
spans.append(char_span)
|
||||||
|
|
|
@ -94,6 +94,7 @@ class Span:
|
||||||
start_idx: int,
|
start_idx: int,
|
||||||
end_idx: int,
|
end_idx: int,
|
||||||
label: Union[int, str] = ...,
|
label: Union[int, str] = ...,
|
||||||
|
*,
|
||||||
kb_id: Union[int, str] = ...,
|
kb_id: Union[int, str] = ...,
|
||||||
vector: Optional[Floats1d] = ...,
|
vector: Optional[Floats1d] = ...,
|
||||||
alignment_mode: str = ...,
|
alignment_mode: str = ...,
|
||||||
|
|
|
@ -666,11 +666,11 @@ cdef class Span:
|
||||||
else:
|
else:
|
||||||
return self.doc[root]
|
return self.doc[root]
|
||||||
|
|
||||||
def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
|
def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
|
||||||
"""Create a `Span` object from the slice `span.text[start : end]`.
|
"""Create a `Span` object from the slice `span.text[start : end]`.
|
||||||
|
|
||||||
start (int): The index of the first character of the span.
|
start_idx (int): The index of the first character of the span.
|
||||||
end (int): The index of the first character after the span.
|
end_idx (int): The index of the first character after the span.
|
||||||
label (Union[int, str]): A label to attach to the Span, e.g. for
|
label (Union[int, str]): A label to attach to the Span, e.g. for
|
||||||
named entities.
|
named entities.
|
||||||
kb_id (Union[int, str]): An ID from a KB to capture the meaning of a named entity.
|
kb_id (Union[int, str]): An ID from a KB to capture the meaning of a named entity.
|
||||||
|
|
|
@ -214,6 +214,7 @@ alignment mode `"strict".
|
||||||
| `start` | The index of the first character of the span. ~~int~~ |
|
| `start` | The index of the first character of the span. ~~int~~ |
|
||||||
| `end` | The index of the last character after the span. ~~int~~ |
|
| `end` | The index of the last character after the span. ~~int~~ |
|
||||||
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
||||||
|
| _keyword-only_ | |
|
||||||
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
||||||
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
||||||
| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
| `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
||||||
|
|
|
@ -188,9 +188,10 @@ the character indices don't map to a valid span.
|
||||||
|
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `start` | The index of the first character of the span. ~~int~~ |
|
| `start_idx` | The index of the first character of the span. ~~int~~ |
|
||||||
| `end` | The index of the last character after the span. ~~int~~ |
|
| `end_idx` | The index of the last character after the span. ~~int~~ |
|
||||||
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
| `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ |
|
||||||
|
| _keyword-only_ | |
|
||||||
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
| `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ |
|
||||||
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
| `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ |
|
||||||
| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
| `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user