Revert "feat: add example stubs (#12679)"

This reverts commit 30bb34533a.
This commit is contained in:
svlandeg 2023-07-06 17:02:38 +02:00
parent 30bb34533a
commit d26e4e0849
3 changed files with 10 additions and 81 deletions

View File

@ -8,7 +8,6 @@ from typing import (
List, List,
Optional, Optional,
Protocol, Protocol,
Sequence,
Tuple, Tuple,
Union, Union,
overload, overload,
@ -135,12 +134,7 @@ class Doc:
def text(self) -> str: ... def text(self) -> str: ...
@property @property
def text_with_ws(self) -> str: ... def text_with_ws(self) -> str: ...
# Ideally the getter would output Tuple[Span] ents: Tuple[Span]
# see https://github.com/python/mypy/issues/3004
@property
def ents(self) -> Sequence[Span]: ...
@ents.setter
def ents(self, value: Sequence[Span]) -> None: ...
def set_ents( def set_ents(
self, self,
entities: List[Span], entities: List[Span],

View File

@ -6,7 +6,6 @@ from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Optional,
import srsly import srsly
from .. import util from .. import util
from ..compat import Protocol
from ..errors import Errors, Warnings from ..errors import Errors, Warnings
from ..tokens import Doc, DocBin from ..tokens import Doc, DocBin
from ..vocab import Vocab from ..vocab import Vocab
@ -20,11 +19,6 @@ if TYPE_CHECKING:
FILE_TYPE = ".spacy" FILE_TYPE = ".spacy"
class ReaderProtocol(Protocol):
def __call__(self, nlp: "Language") -> Iterable[Example]:
pass
@util.registry.readers("spacy.Corpus.v1") @util.registry.readers("spacy.Corpus.v1")
def create_docbin_reader( def create_docbin_reader(
path: Optional[Path], path: Optional[Path],
@ -32,7 +26,7 @@ def create_docbin_reader(
max_length: int = 0, max_length: int = 0,
limit: int = 0, limit: int = 0,
augmenter: Optional[Callable] = None, augmenter: Optional[Callable] = None,
) -> ReaderProtocol: ) -> Callable[["Language"], Iterable[Example]]:
if path is None: if path is None:
raise ValueError(Errors.E913) raise ValueError(Errors.E913)
util.logger.debug("Loading corpus from path: %s", path) util.logger.debug("Loading corpus from path: %s", path)
@ -51,7 +45,7 @@ def create_jsonl_reader(
min_length: int = 0, min_length: int = 0,
max_length: int = 0, max_length: int = 0,
limit: int = 0, limit: int = 0,
) -> ReaderProtocol: ) -> Callable[["Language"], Iterable[Example]]:
return JsonlCorpus(path, min_length=min_length, max_length=max_length, limit=limit) return JsonlCorpus(path, min_length=min_length, max_length=max_length, limit=limit)
@ -69,7 +63,7 @@ def create_plain_text_reader(
path: Optional[Path], path: Optional[Path],
min_length: int = 0, min_length: int = 0,
max_length: int = 0, max_length: int = 0,
) -> ReaderProtocol: ) -> Callable[["Language"], Iterable[Doc]]:
"""Iterate Example objects from a file or directory of plain text """Iterate Example objects from a file or directory of plain text
UTF-8 files with one line per doc. UTF-8 files with one line per doc.
@ -150,7 +144,7 @@ class Corpus:
self.augmenter = augmenter if augmenter is not None else dont_augment self.augmenter = augmenter if augmenter is not None else dont_augment
self.shuffle = shuffle self.shuffle = shuffle
def __call__(self, nlp: "Language") -> Iterable[Example]: def __call__(self, nlp: "Language") -> Iterator[Example]:
"""Yield examples from the data. """Yield examples from the data.
nlp (Language): The current nlp object. nlp (Language): The current nlp object.
@ -188,7 +182,7 @@ class Corpus:
def make_examples( def make_examples(
self, nlp: "Language", reference_docs: Iterable[Doc] self, nlp: "Language", reference_docs: Iterable[Doc]
) -> Iterable[Example]: ) -> Iterator[Example]:
for reference in reference_docs: for reference in reference_docs:
if len(reference) == 0: if len(reference) == 0:
continue continue
@ -203,7 +197,7 @@ class Corpus:
def make_examples_gold_preproc( def make_examples_gold_preproc(
self, nlp: "Language", reference_docs: Iterable[Doc] self, nlp: "Language", reference_docs: Iterable[Doc]
) -> Iterable[Example]: ) -> Iterator[Example]:
for reference in reference_docs: for reference in reference_docs:
if reference.has_annotation("SENT_START"): if reference.has_annotation("SENT_START"):
ref_sents = [sent.as_doc() for sent in reference.sents] ref_sents = [sent.as_doc() for sent in reference.sents]
@ -216,7 +210,7 @@ class Corpus:
def read_docbin( def read_docbin(
self, vocab: Vocab, locs: Iterable[Union[str, Path]] self, vocab: Vocab, locs: Iterable[Union[str, Path]]
) -> Iterable[Doc]: ) -> Iterator[Doc]:
"""Yield training examples as example dicts""" """Yield training examples as example dicts"""
i = 0 i = 0
for loc in locs: for loc in locs:
@ -263,7 +257,7 @@ class JsonlCorpus:
self.max_length = max_length self.max_length = max_length
self.limit = limit self.limit = limit
def __call__(self, nlp: "Language") -> Iterable[Example]: def __call__(self, nlp: "Language") -> Iterator[Example]:
"""Yield examples from the data. """Yield examples from the data.
nlp (Language): The current nlp object. nlp (Language): The current nlp object.
@ -313,7 +307,7 @@ class PlainTextCorpus:
self.min_length = min_length self.min_length = min_length
self.max_length = max_length self.max_length = max_length
def __call__(self, nlp: "Language") -> Iterable[Example]: def __call__(self, nlp: "Language") -> Iterator[Example]:
"""Yield examples from the data. """Yield examples from the data.
nlp (Language): The current nlp object. nlp (Language): The current nlp object.

View File

@ -1,59 +0,0 @@
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
from ..tokens import Doc, Span
from ..vocab import Vocab
from .alignment import Alignment
def annotations_to_doc(
vocab: Vocab,
tok_annot: Dict[str, Any],
doc_annot: Dict[str, Any],
) -> Doc: ...
def validate_examples(
examples: Iterable[Example],
method: str,
) -> None: ...
def validate_get_examples(
get_examples: Callable[[], Iterable[Example]],
method: str,
): ...
class Example:
x: Doc
y: Doc
def __init__(
self,
predicted: Doc,
reference: Doc,
*,
alignment: Optional[Alignment] = None,
): ...
def __len__(self) -> int: ...
@property
def predicted(self) -> Doc: ...
@predicted.setter
def predicted(self, doc: Doc) -> None: ...
@property
def reference(self) -> Doc: ...
@reference.setter
def reference(self, doc: Doc) -> None: ...
def copy(self) -> Example: ...
@classmethod
def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ...
@property
def alignment(self) -> Alignment: ...
def get_aligned(self, field: str, as_string=False): ...
def get_aligned_parse(self, projectivize=True): ...
def get_aligned_sent_starts(self): ...
def get_aligned_spans_x2y(self, x_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
def get_aligned_spans_y2x(self, y_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ...
def get_aligned_ner(self) -> List[str]: ...
def get_matching_ents(self, check_label: bool = True) -> List[Span]: ...
def to_dict(self) -> Dict[str, Any]: ...
def split_sents(self) -> List[Example]: ...
@property
def text(self) -> str: ...
def __str__(self) -> str: ...
def __repr__(self) -> str: ...