From 07374430964848148bc018ff2f36f3dc3cf3b315 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 2 Aug 2023 08:15:12 +0200 Subject: [PATCH] feat: add example stubs (3) (#12801) * feat: add example stubs * fix: add required annotations * fix: mypy issues * fix: use Py36-compatible Portocol * Minor reformatting * adding further type specifications and removing internal methods * black formatting * widen type to iterable * add private methods that are being used by the built-in convertors * revert changes to corpus.py * fixes * fixes * fix typing of PlainTextCorpus --------- Co-authored-by: Basile Dura Co-authored-by: Adriane Boyd --- spacy/tokens/doc.pyi | 8 ++++- spacy/training/corpus.py | 2 +- spacy/training/example.pyi | 66 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+), 2 deletions(-) create mode 100644 spacy/training/example.pyi diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index 00c7a9d07..55222f8aa 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -8,6 +8,7 @@ from typing import ( List, Optional, Protocol, + Sequence, Tuple, Union, overload, @@ -134,7 +135,12 @@ class Doc: def text(self) -> str: ... @property def text_with_ws(self) -> str: ... - ents: Tuple[Span] + # Ideally the getter would output Tuple[Span] + # see https://github.com/python/mypy/issues/3004 + @property + def ents(self) -> Sequence[Span]: ... + @ents.setter + def ents(self, value: Sequence[Span]) -> None: ... def set_ents( self, entities: List[Span], diff --git a/spacy/training/corpus.py b/spacy/training/corpus.py index 6037c15e3..5cc2733a5 100644 --- a/spacy/training/corpus.py +++ b/spacy/training/corpus.py @@ -63,7 +63,7 @@ def create_plain_text_reader( path: Optional[Path], min_length: int = 0, max_length: int = 0, -) -> Callable[["Language"], Iterable[Doc]]: +) -> Callable[["Language"], Iterable[Example]]: """Iterate Example objects from a file or directory of plain text UTF-8 files with one line per doc. diff --git a/spacy/training/example.pyi b/spacy/training/example.pyi new file mode 100644 index 000000000..06639d70c --- /dev/null +++ b/spacy/training/example.pyi @@ -0,0 +1,66 @@ +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple + +from ..tokens import Doc, Span +from ..vocab import Vocab +from .alignment import Alignment + +def annotations_to_doc( + vocab: Vocab, + tok_annot: Dict[str, Any], + doc_annot: Dict[str, Any], +) -> Doc: ... +def validate_examples( + examples: Iterable[Example], + method: str, +) -> None: ... +def validate_get_examples( + get_examples: Callable[[], Iterable[Example]], + method: str, +): ... + +class Example: + x: Doc + y: Doc + + def __init__( + self, + predicted: Doc, + reference: Doc, + *, + alignment: Optional[Alignment] = None, + ): ... + def __len__(self) -> int: ... + @property + def predicted(self) -> Doc: ... + @predicted.setter + def predicted(self, doc: Doc) -> None: ... + @property + def reference(self) -> Doc: ... + @reference.setter + def reference(self, doc: Doc) -> None: ... + def copy(self) -> Example: ... + @classmethod + def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ... + @property + def alignment(self) -> Alignment: ... + def get_aligned(self, field: str, as_string=False): ... + def get_aligned_parse(self, projectivize=True): ... + def get_aligned_sent_starts(self): ... + def get_aligned_spans_x2y( + self, x_spans: Iterable[Span], allow_overlap=False + ) -> List[Span]: ... + def get_aligned_spans_y2x( + self, y_spans: Iterable[Span], allow_overlap=False + ) -> List[Span]: ... + def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ... + def get_aligned_ner(self) -> List[str]: ... + def get_matching_ents(self, check_label: bool = True) -> List[Span]: ... + def to_dict(self) -> Dict[str, Any]: ... + def split_sents(self) -> List[Example]: ... + @property + def text(self) -> str: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + +def _parse_example_dict_data(example_dict): ... +def _fix_legacy_dict_data(example_dict): ...