Make Span.char_span optional args keyword-only (#12257)

* Make Span.char_span optional args keyword-only * Make kb_id and following kw-only * Format
2025-12-24 02:23:19 +03:00 · 2023-02-15 12:34:33 +01:00 · 2023-02-15 12:34:33 +01:00 · b95123060a
commit b95123060a
parent cbc2ae933e
6 changed files with 12 additions and 8 deletions
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -105,6 +105,7 @@ class Doc:
        start_idx: int,
        end_idx: int,
        label: Union[int, str] = ...,
        *,
        kb_id: Union[int, str] = ...,
        vector: Optional[Floats1d] = ...,
        alignment_mode: str = ...,
@ -127,7 +128,7 @@ class Doc:
        blocked: Optional[List[Span]] = ...,
        missing: Optional[List[Span]] = ...,
        outside: Optional[List[Span]] = ...,
-        default: str = ...
+        default: str = ...,
    ) -> None: ...
    @property
    def noun_chunks(self) -> Iterator[Span]: ...
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -520,7 +520,7 @@ cdef class Doc:
    def doc(self):
        return self
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
+    def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
        """Create a `Span` object from the slice
        `doc.text[start_idx : end_idx]`. Returns None if no valid `Span` can be
        created.
@ -1605,7 +1605,7 @@ cdef class Doc:
        for span_group in doc_json.get("spans", {}):
            spans = []
            for span in doc_json["spans"][span_group]:
-                char_span = self.char_span(span["start"], span["end"], span["label"], span["kb_id"])
+                char_span = self.char_span(span["start"], span["end"], span["label"], kb_id=span["kb_id"])
                if char_span is None:
                    raise ValueError(Errors.E1039.format(obj="span", start=span["start"], end=span["end"]))
                spans.append(char_span)
--- a/spacy/tokens/span.pyi
+++ b/spacy/tokens/span.pyi
@ -94,6 +94,7 @@ class Span:
        start_idx: int,
        end_idx: int,
        label: Union[int, str] = ...,
        *,
        kb_id: Union[int, str] = ...,
        vector: Optional[Floats1d] = ...,
        alignment_mode: str = ...,
--- a/spacy/tokens/span.pyx
+++ b/spacy/tokens/span.pyx
@ -666,11 +666,11 @@ cdef class Span:
        else:
            return self.doc[root]
-    def char_span(self, int start_idx, int end_idx, label=0, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
+    def char_span(self, int start_idx, int end_idx, label=0, *, kb_id=0, vector=None, alignment_mode="strict", span_id=0):
        """Create a `Span` object from the slice `span.text[start : end]`.
-        start (int): The index of the first character of the span.
+        start_idx (int): The index of the first character of the span.
-        end (int): The index of the first character after the span.
+        end_idx (int): The index of the first character after the span.
        label (Union[int, str]): A label to attach to the Span, e.g. for
            named entities.
        kb_id (Union[int, str]):  An ID from a KB to capture the meaning of a named entity.
--- a/website/docs/api/doc.mdx
+++ b/website/docs/api/doc.mdx
@ -214,6 +214,7 @@ alignment mode `"strict".
 | `start`                                  | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
 | `end`                                    | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
 | `label`                                  | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
 | _keyword-only_                           |                                                                                                                                                                                                                                                                              |
 | `kb_id`                                  | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
 | `vector`                                 | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
 | `alignment_mode`                         | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |
--- a/website/docs/api/span.mdx
+++ b/website/docs/api/span.mdx
@ -188,9 +188,10 @@ the character indices don't map to a valid span.
 | Name                                            | Description                                                                                                                                                                                                                                                                  |
 | ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `start`                                         | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
+| `start_idx`                                     | The index of the first character of the span. ~~int~~                                                                                                                                                                                                                        |
-| `end`                                           | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
+| `end_idx`                                       | The index of the last character after the span. ~~int~~                                                                                                                                                                                                                      |
 | `label`                                         | A label to attach to the span, e.g. for named entities. ~~Union[int, str]~~                                                                                                                                                                                                  |
 | _keyword-only_                                  |                                                                                                                                                                                                                                                                              |
 | `kb_id`                                         | An ID from a knowledge base to capture the meaning of a named entity. ~~Union[int, str]~~                                                                                                                                                                                    |
 | `vector`                                        | A meaning representation of the span. ~~numpy.ndarray[ndim=1, dtype=float32]~~                                                                                                                                                                                               |
 | `alignment_mode` <Tag variant="new">3.5.1</Tag> | How character indices snap to token boundaries. Options: `"strict"` (no snapping), `"contract"` (span of all tokens completely within the character span), `"expand"` (span of all tokens at least partially covered by the character span). Defaults to `"strict"`. ~~str~~ |