diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index 93cd8de05..6ff61c05d 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -105,6 +105,7 @@ class Doc: start_idx: int, end_idx: int, label: Union[int, str] = ..., + *, kb_id: Union[int, str] = ..., vector: Optional[Floats1d] = ..., alignment_mode: str = ..., @@ -127,7 +128,7 @@ class Doc: blocked: Optional[List[Span]] = ..., missing: Optional[List[Span]] = ..., outside: Optional[List[Span]] = ..., - default: str = ... + default: str = ..., ) -> None: ... @property def noun_chunks(self) -> Iterator[Span]: ... diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 2eca1aafd..f4836dd14 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -520,7 +520,7 @@ cdef class Doc: def doc(self): return self - def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0): + def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0): """Create a `Span` object from the slice `doc.text[start_idx : end_idx]`. Returns None if no valid `Span` can be created. @@ -1605,7 +1605,7 @@ cdef class Doc: for span_group in doc_json.get("spans", {}): spans = [] for span in doc_json["spans"][span_group]: - char_span = self.char_span(span["start"], span["end"], span["label"], span["kb_id"]) + char_span = self.char_span(span["start"], span["end"], span["label"], kb_id=span["kb_id"]) if char_span is None: raise ValueError(Errors.E1039.format(obj="span", start=span["start"], end=span["end"])) spans.append(char_span) diff --git a/spacy/tokens/span.pyi b/spacy/tokens/span.pyi index 549990c5e..88cb90a17 100644 --- a/spacy/tokens/span.pyi +++ b/spacy/tokens/span.pyi @@ -94,6 +94,7 @@ class Span: start_idx: int, end_idx: int, label: Union[int, str] = ..., + *, kb_id: Union[int, str] = ..., vector: Optional[Floats1d] = ..., alignment_mode: str = ..., diff --git a/spacy/tokens/span.pyx b/spacy/tokens/span.pyx index 4990cb5f7..25dbfecdf 100644 --- a/spacy/tokens/span.pyx +++ b/spacy/tokens/span.pyx @@ -666,11 +666,11 @@ cdef class Span: else: return self.doc[root] - def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0): + def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0): """Create a `Span` object from the slice `span.text[start : end]`. - start (int): The index of the first character of the span. - end (int): The index of the first character after the span. + start_idx (int): The index of the first character of the span. + end_idx (int): The index of the first character after the span. label (Union[int, str]): A label to attach to the Span, e.g. for named entities. kb_id (Union[int, str]): An ID from a KB to capture the meaning of a named entity. diff --git a/website/docs/api/doc.mdx b/website/docs/api/doc.mdx index 1a3f6179f..96fe2c35a 100644 --- a/website/docs/api/doc.mdx +++ b/website/docs/api/doc.mdx @@ -214,6 +214,7 @@ alignment mode `"strict". | `start` | The index of the first character of the span. ~~int~~ | | `end` | The index of the last character after the span. ~~int~~ | | `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ | +| _keyword-only_ | | | `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ | | `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ | | `alignment_mode` | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ | diff --git a/website/docs/api/span.mdx b/website/docs/api/span.mdx index 7e7042866..832501d37 100644 --- a/website/docs/api/span.mdx +++ b/website/docs/api/span.mdx @@ -188,9 +188,10 @@ the character indices don't map to a valid span. | Name | Description | | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `start` | The index of the first character of the span. ~~int~~ | -| `end` | The index of the last character after the span. ~~int~~ | +| `start_idx` | The index of the first character of the span. ~~int~~ | +| `end_idx` | The index of the last character after the span. ~~int~~ | | `label` | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~ | +| _keyword-only_ | | | `kb_id` | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~ | | `vector` | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~ | | `alignment_mode` 3.5.1 | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |