feat: add example stubs (3) (#12801)

* feat: add example stubs * fix: add required annotations * fix: mypy issues * fix: use Py36-compatible Portocol * Minor reformatting * adding further type specifications and removing internal methods * black formatting * widen type to iterable * add private methods that are being used by the built-in convertors * revert changes to corpus.py * fixes * fixes * fix typing of PlainTextCorpus --------- Co-authored-by: Basile Dura <basile@bdura.me> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
2025-12-23 01:53:17 +03:00 · 2023-08-02 08:15:12 +02:00 · 2023-08-02 08:15:12 +02:00 · 0737443096
commit 0737443096
parent 0fe43f40f1
3 changed files with 74 additions and 2 deletions
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -8,6 +8,7 @@ from typing import (
    List,
    Optional,
    Protocol,
    Sequence,
    Tuple,
    Union,
    overload,
@ -134,7 +135,12 @@ class Doc:
    def text(self) -> str: ...
    @property
    def text_with_ws(self) -> str: ...
-    ents: Tuple[Span]
+    # Ideally the getter would output Tuple[Span]
    # see https://github.com/python/mypy/issues/3004
    @property
    def ents(self) -> Sequence[Span]: ...
    @ents.setter
    def ents(self, value: Sequence[Span]) -> None: ...
    def set_ents(
        self,
        entities: List[Span],
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@ -63,7 +63,7 @@ def create_plain_text_reader(
    path: Optional[Path],
    min_length: int = 0,
    max_length: int = 0,
-) -> Callable[["Language"], Iterable[Doc]]:
+) -> Callable[["Language"], Iterable[Example]]:
    """Iterate Example objects from a file or directory of plain text
    UTF-8 files with one line per doc.
--- a/spacy/training/example.pyi
+++ b/spacy/training/example.pyi
@ -0,0 +1,66 @@
 from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple
 from ..tokens import Doc, Span
 from ..vocab import Vocab
 from .alignment import Alignment
 def annotations_to_doc(
    vocab: Vocab,
    tok_annot: Dict[str, Any],
    doc_annot: Dict[str, Any],
 ) -> Doc: ...
 def validate_examples(
    examples: Iterable[Example],
    method: str,
 ) -> None: ...
 def validate_get_examples(
    get_examples: Callable[[], Iterable[Example]],
    method: str,
 ): ...
 class Example:
    x: Doc
    y: Doc
    def __init__(
        self,
        predicted: Doc,
        reference: Doc,
        *,
        alignment: Optional[Alignment] = None,
    ): ...
    def __len__(self) -> int: ...
    @property
    def predicted(self) -> Doc: ...
    @predicted.setter
    def predicted(self, doc: Doc) -> None: ...
    @property
    def reference(self) -> Doc: ...
    @reference.setter
    def reference(self, doc: Doc) -> None: ...
    def copy(self) -> Example: ...
    @classmethod
    def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ...
    @property
    def alignment(self) -> Alignment: ...
    def get_aligned(self, field: str, as_string=False): ...
    def get_aligned_parse(self, projectivize=True): ...
    def get_aligned_sent_starts(self): ...
    def get_aligned_spans_x2y(
        self, x_spans: Iterable[Span], allow_overlap=False
    ) -> List[Span]: ...
    def get_aligned_spans_y2x(
        self, y_spans: Iterable[Span], allow_overlap=False
    ) -> List[Span]: ...
    def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ...
    def get_aligned_ner(self) -> List[str]: ...
    def get_matching_ents(self, check_label: bool = True) -> List[Span]: ...
    def to_dict(self) -> Dict[str, Any]: ...
    def split_sents(self) -> List[Example]: ...
    @property
    def text(self) -> str: ...
    def __str__(self) -> str: ...
    def __repr__(self) -> str: ...
 def _parse_example_dict_data(example_dict): ...
 def _fix_legacy_dict_data(example_dict): ...