Revert "feat: add example stubs (#12679)"

This reverts commit 30bb34533a.
2025-12-16 14:44:19 +03:00 · 2023-07-06 17:02:38 +02:00 · 2023-07-06 17:02:38 +02:00 · d26e4e0849
commit d26e4e0849
parent 30bb34533a
3 changed files with 10 additions and 81 deletions
--- a/spacy/tokens/doc.pyi
+++ b/spacy/tokens/doc.pyi
@ -8,7 +8,6 @@ from typing import (
    List,
    Optional,
    Protocol,
-    Sequence,
    Tuple,
    Union,
    overload,
@ -135,12 +134,7 @@ class Doc:
    def text(self) -> str: ...
    @property
    def text_with_ws(self) -> str: ...
-    # Ideally the getter would output Tuple[Span]
-    # see https://github.com/python/mypy/issues/3004
-    @property
-    def ents(self) -> Sequence[Span]: ...
-    @ents.setter
-    def ents(self, value: Sequence[Span]) -> None: ...
+    ents: Tuple[Span]
    def set_ents(
        self,
        entities: List[Span],
--- a/spacy/training/corpus.py
+++ b/spacy/training/corpus.py
@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Optional,
 import srsly

 from .. import util
-from ..compat import Protocol
 from ..errors import Errors, Warnings
 from ..tokens import Doc, DocBin
 from ..vocab import Vocab
@ -20,11 +19,6 @@ if TYPE_CHECKING:
 FILE_TYPE = ".spacy"


-class ReaderProtocol(Protocol):
-    def __call__(self, nlp: "Language") -> Iterable[Example]:
-        pass
-
-
@util.registry.readers("spacy.Corpus.v1")
 def create_docbin_reader(
    path: Optional[Path],
@ -32,7 +26,7 @@ def create_docbin_reader(
    max_length: int = 0,
    limit: int = 0,
    augmenter: Optional[Callable] = None,
-) -> ReaderProtocol:
+) -> Callable[["Language"], Iterable[Example]]:
    if path is None:
        raise ValueError(Errors.E913)
    util.logger.debug("Loading corpus from path: %s", path)
@ -51,7 +45,7 @@ def create_jsonl_reader(
    min_length: int = 0,
    max_length: int = 0,
    limit: int = 0,
-) -> ReaderProtocol:
+) -> Callable[["Language"], Iterable[Example]]:
    return JsonlCorpus(path, min_length=min_length, max_length=max_length, limit=limit)


@ -69,7 +63,7 @@ def create_plain_text_reader(
    path: Optional[Path],
    min_length: int = 0,
    max_length: int = 0,
-) -> ReaderProtocol:
+) -> Callable[["Language"], Iterable[Doc]]:
    """Iterate Example objects from a file or directory of plain text
    UTF-8 files with one line per doc.

@ -150,7 +144,7 @@ class Corpus:
        self.augmenter = augmenter if augmenter is not None else dont_augment
        self.shuffle = shuffle

-    def __call__(self, nlp: "Language") -> Iterable[Example]:
+    def __call__(self, nlp: "Language") -> Iterator[Example]:
        """Yield examples from the data.

        nlp (Language): The current nlp object.
@ -188,7 +182,7 @@ class Corpus:

    def make_examples(
        self, nlp: "Language", reference_docs: Iterable[Doc]
-    ) -> Iterable[Example]:
+    ) -> Iterator[Example]:
        for reference in reference_docs:
            if len(reference) == 0:
                continue
@ -203,7 +197,7 @@ class Corpus:

    def make_examples_gold_preproc(
        self, nlp: "Language", reference_docs: Iterable[Doc]
-    ) -> Iterable[Example]:
+    ) -> Iterator[Example]:
        for reference in reference_docs:
            if reference.has_annotation("SENT_START"):
                ref_sents = [sent.as_doc() for sent in reference.sents]
@ -216,7 +210,7 @@ class Corpus:

    def read_docbin(
        self, vocab: Vocab, locs: Iterable[Union[str, Path]]
-    ) -> Iterable[Doc]:
+    ) -> Iterator[Doc]:
        """Yield training examples as example dicts"""
        i = 0
        for loc in locs:
@ -263,7 +257,7 @@ class JsonlCorpus:
        self.max_length = max_length
        self.limit = limit

-    def __call__(self, nlp: "Language") -> Iterable[Example]:
+    def __call__(self, nlp: "Language") -> Iterator[Example]:
        """Yield examples from the data.

        nlp (Language): The current nlp object.
@ -313,7 +307,7 @@ class PlainTextCorpus:
        self.min_length = min_length
        self.max_length = max_length

-    def __call__(self, nlp: "Language") -> Iterable[Example]:
+    def __call__(self, nlp: "Language") -> Iterator[Example]:
        """Yield examples from the data.

        nlp (Language): The current nlp object.
--- a/spacy/training/example.pyi
+++ b/spacy/training/example.pyi
@ -1,59 +0,0 @@
-from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
-
-from ..tokens import Doc, Span
-from ..vocab import Vocab
-from .alignment import Alignment
-
-def annotations_to_doc(
-    vocab: Vocab,
-    tok_annot: Dict[str, Any],
-    doc_annot: Dict[str, Any],
-) -> Doc: ...
-def validate_examples(
-    examples: Iterable[Example],
-    method: str,
-) -> None: ...
-def validate_get_examples(
-    get_examples: Callable[[], Iterable[Example]],
-    method: str,
-): ...
-
-class Example:
-    x: Doc
-    y: Doc
-
-    def __init__(
-        self,
-        predicted: Doc,
-        reference: Doc,
-        *,
-        alignment: Optional[Alignment] = None,
-    ): ...
-    def __len__(self) -> int: ...
-    @property
-    def predicted(self) -> Doc: ...
-    @predicted.setter
-    def predicted(self, doc: Doc) -> None: ...
-    @property
-    def reference(self) -> Doc: ...
-    @reference.setter
-    def reference(self, doc: Doc) -> None: ...
-    def copy(self) -> Example: ...
-    @classmethod
-    def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ...
-    @property
-    def alignment(self) -> Alignment: ...
-    def get_aligned(self, field: str, as_string=False): ...
-    def get_aligned_parse(self, projectivize=True): ...
-    def get_aligned_sent_starts(self): ...
-    def get_aligned_spans_x2y(self, x_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
-    def get_aligned_spans_y2x(self, y_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
-    def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ...
-    def get_aligned_ner(self) -> List[str]: ...
-    def get_matching_ents(self, check_label: bool = True) -> List[Span]: ...
-    def to_dict(self) -> Dict[str, Any]: ...
-    def split_sents(self) -> List[Example]: ...
-    @property
-    def text(self) -> str: ...
-    def __str__(self) -> str: ...
-    def __repr__(self) -> str: ...