mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	feat: add example stubs (#12679)
* feat: add example stubs * fix: add required annotations * fix: mypy issues * fix: use Py36-compatible Portocol * Minor reformatting --------- Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: svlandeg <svlandeg@github.com>
This commit is contained in:
		
							parent
							
								
									6fc153a266
								
							
						
					
					
						commit
						30bb34533a
					
				| 
						 | 
					@ -8,6 +8,7 @@ from typing import (
 | 
				
			||||||
    List,
 | 
					    List,
 | 
				
			||||||
    Optional,
 | 
					    Optional,
 | 
				
			||||||
    Protocol,
 | 
					    Protocol,
 | 
				
			||||||
 | 
					    Sequence,
 | 
				
			||||||
    Tuple,
 | 
					    Tuple,
 | 
				
			||||||
    Union,
 | 
					    Union,
 | 
				
			||||||
    overload,
 | 
					    overload,
 | 
				
			||||||
| 
						 | 
					@ -134,7 +135,12 @@ class Doc:
 | 
				
			||||||
    def text(self) -> str: ...
 | 
					    def text(self) -> str: ...
 | 
				
			||||||
    @property
 | 
					    @property
 | 
				
			||||||
    def text_with_ws(self) -> str: ...
 | 
					    def text_with_ws(self) -> str: ...
 | 
				
			||||||
    ents: Tuple[Span]
 | 
					    # Ideally the getter would output Tuple[Span]
 | 
				
			||||||
 | 
					    # see https://github.com/python/mypy/issues/3004
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def ents(self) -> Sequence[Span]: ...
 | 
				
			||||||
 | 
					    @ents.setter
 | 
				
			||||||
 | 
					    def ents(self, value: Sequence[Span]) -> None: ...
 | 
				
			||||||
    def set_ents(
 | 
					    def set_ents(
 | 
				
			||||||
        self,
 | 
					        self,
 | 
				
			||||||
        entities: List[Span],
 | 
					        entities: List[Span],
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,6 +6,7 @@ from typing import TYPE_CHECKING, Callable, Iterable, Iterator, List, Optional,
 | 
				
			||||||
import srsly
 | 
					import srsly
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
 | 
					from ..compat import Protocol
 | 
				
			||||||
from ..errors import Errors, Warnings
 | 
					from ..errors import Errors, Warnings
 | 
				
			||||||
from ..tokens import Doc, DocBin
 | 
					from ..tokens import Doc, DocBin
 | 
				
			||||||
from ..vocab import Vocab
 | 
					from ..vocab import Vocab
 | 
				
			||||||
| 
						 | 
					@ -19,6 +20,11 @@ if TYPE_CHECKING:
 | 
				
			||||||
FILE_TYPE = ".spacy"
 | 
					FILE_TYPE = ".spacy"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class ReaderProtocol(Protocol):
 | 
				
			||||||
 | 
					    def __call__(self, nlp: "Language") -> Iterable[Example]:
 | 
				
			||||||
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@util.registry.readers("spacy.Corpus.v1")
 | 
					@util.registry.readers("spacy.Corpus.v1")
 | 
				
			||||||
def create_docbin_reader(
 | 
					def create_docbin_reader(
 | 
				
			||||||
    path: Optional[Path],
 | 
					    path: Optional[Path],
 | 
				
			||||||
| 
						 | 
					@ -26,7 +32,7 @@ def create_docbin_reader(
 | 
				
			||||||
    max_length: int = 0,
 | 
					    max_length: int = 0,
 | 
				
			||||||
    limit: int = 0,
 | 
					    limit: int = 0,
 | 
				
			||||||
    augmenter: Optional[Callable] = None,
 | 
					    augmenter: Optional[Callable] = None,
 | 
				
			||||||
) -> Callable[["Language"], Iterable[Example]]:
 | 
					) -> ReaderProtocol:
 | 
				
			||||||
    if path is None:
 | 
					    if path is None:
 | 
				
			||||||
        raise ValueError(Errors.E913)
 | 
					        raise ValueError(Errors.E913)
 | 
				
			||||||
    util.logger.debug("Loading corpus from path: %s", path)
 | 
					    util.logger.debug("Loading corpus from path: %s", path)
 | 
				
			||||||
| 
						 | 
					@ -45,7 +51,7 @@ def create_jsonl_reader(
 | 
				
			||||||
    min_length: int = 0,
 | 
					    min_length: int = 0,
 | 
				
			||||||
    max_length: int = 0,
 | 
					    max_length: int = 0,
 | 
				
			||||||
    limit: int = 0,
 | 
					    limit: int = 0,
 | 
				
			||||||
) -> Callable[["Language"], Iterable[Example]]:
 | 
					) -> ReaderProtocol:
 | 
				
			||||||
    return JsonlCorpus(path, min_length=min_length, max_length=max_length, limit=limit)
 | 
					    return JsonlCorpus(path, min_length=min_length, max_length=max_length, limit=limit)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -63,7 +69,7 @@ def create_plain_text_reader(
 | 
				
			||||||
    path: Optional[Path],
 | 
					    path: Optional[Path],
 | 
				
			||||||
    min_length: int = 0,
 | 
					    min_length: int = 0,
 | 
				
			||||||
    max_length: int = 0,
 | 
					    max_length: int = 0,
 | 
				
			||||||
) -> Callable[["Language"], Iterable[Doc]]:
 | 
					) -> ReaderProtocol:
 | 
				
			||||||
    """Iterate Example objects from a file or directory of plain text
 | 
					    """Iterate Example objects from a file or directory of plain text
 | 
				
			||||||
    UTF-8 files with one line per doc.
 | 
					    UTF-8 files with one line per doc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -144,7 +150,7 @@ class Corpus:
 | 
				
			||||||
        self.augmenter = augmenter if augmenter is not None else dont_augment
 | 
					        self.augmenter = augmenter if augmenter is not None else dont_augment
 | 
				
			||||||
        self.shuffle = shuffle
 | 
					        self.shuffle = shuffle
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __call__(self, nlp: "Language") -> Iterator[Example]:
 | 
					    def __call__(self, nlp: "Language") -> Iterable[Example]:
 | 
				
			||||||
        """Yield examples from the data.
 | 
					        """Yield examples from the data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        nlp (Language): The current nlp object.
 | 
					        nlp (Language): The current nlp object.
 | 
				
			||||||
| 
						 | 
					@ -182,7 +188,7 @@ class Corpus:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_examples(
 | 
					    def make_examples(
 | 
				
			||||||
        self, nlp: "Language", reference_docs: Iterable[Doc]
 | 
					        self, nlp: "Language", reference_docs: Iterable[Doc]
 | 
				
			||||||
    ) -> Iterator[Example]:
 | 
					    ) -> Iterable[Example]:
 | 
				
			||||||
        for reference in reference_docs:
 | 
					        for reference in reference_docs:
 | 
				
			||||||
            if len(reference) == 0:
 | 
					            if len(reference) == 0:
 | 
				
			||||||
                continue
 | 
					                continue
 | 
				
			||||||
| 
						 | 
					@ -197,7 +203,7 @@ class Corpus:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def make_examples_gold_preproc(
 | 
					    def make_examples_gold_preproc(
 | 
				
			||||||
        self, nlp: "Language", reference_docs: Iterable[Doc]
 | 
					        self, nlp: "Language", reference_docs: Iterable[Doc]
 | 
				
			||||||
    ) -> Iterator[Example]:
 | 
					    ) -> Iterable[Example]:
 | 
				
			||||||
        for reference in reference_docs:
 | 
					        for reference in reference_docs:
 | 
				
			||||||
            if reference.has_annotation("SENT_START"):
 | 
					            if reference.has_annotation("SENT_START"):
 | 
				
			||||||
                ref_sents = [sent.as_doc() for sent in reference.sents]
 | 
					                ref_sents = [sent.as_doc() for sent in reference.sents]
 | 
				
			||||||
| 
						 | 
					@ -210,7 +216,7 @@ class Corpus:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def read_docbin(
 | 
					    def read_docbin(
 | 
				
			||||||
        self, vocab: Vocab, locs: Iterable[Union[str, Path]]
 | 
					        self, vocab: Vocab, locs: Iterable[Union[str, Path]]
 | 
				
			||||||
    ) -> Iterator[Doc]:
 | 
					    ) -> Iterable[Doc]:
 | 
				
			||||||
        """Yield training examples as example dicts"""
 | 
					        """Yield training examples as example dicts"""
 | 
				
			||||||
        i = 0
 | 
					        i = 0
 | 
				
			||||||
        for loc in locs:
 | 
					        for loc in locs:
 | 
				
			||||||
| 
						 | 
					@ -257,7 +263,7 @@ class JsonlCorpus:
 | 
				
			||||||
        self.max_length = max_length
 | 
					        self.max_length = max_length
 | 
				
			||||||
        self.limit = limit
 | 
					        self.limit = limit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __call__(self, nlp: "Language") -> Iterator[Example]:
 | 
					    def __call__(self, nlp: "Language") -> Iterable[Example]:
 | 
				
			||||||
        """Yield examples from the data.
 | 
					        """Yield examples from the data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        nlp (Language): The current nlp object.
 | 
					        nlp (Language): The current nlp object.
 | 
				
			||||||
| 
						 | 
					@ -307,7 +313,7 @@ class PlainTextCorpus:
 | 
				
			||||||
        self.min_length = min_length
 | 
					        self.min_length = min_length
 | 
				
			||||||
        self.max_length = max_length
 | 
					        self.max_length = max_length
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __call__(self, nlp: "Language") -> Iterator[Example]:
 | 
					    def __call__(self, nlp: "Language") -> Iterable[Example]:
 | 
				
			||||||
        """Yield examples from the data.
 | 
					        """Yield examples from the data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        nlp (Language): The current nlp object.
 | 
					        nlp (Language): The current nlp object.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										59
									
								
								spacy/training/example.pyi
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										59
									
								
								spacy/training/example.pyi
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,59 @@
 | 
				
			||||||
 | 
					from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ..tokens import Doc, Span
 | 
				
			||||||
 | 
					from ..vocab import Vocab
 | 
				
			||||||
 | 
					from .alignment import Alignment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def annotations_to_doc(
 | 
				
			||||||
 | 
					    vocab: Vocab,
 | 
				
			||||||
 | 
					    tok_annot: Dict[str, Any],
 | 
				
			||||||
 | 
					    doc_annot: Dict[str, Any],
 | 
				
			||||||
 | 
					) -> Doc: ...
 | 
				
			||||||
 | 
					def validate_examples(
 | 
				
			||||||
 | 
					    examples: Iterable[Example],
 | 
				
			||||||
 | 
					    method: str,
 | 
				
			||||||
 | 
					) -> None: ...
 | 
				
			||||||
 | 
					def validate_get_examples(
 | 
				
			||||||
 | 
					    get_examples: Callable[[], Iterable[Example]],
 | 
				
			||||||
 | 
					    method: str,
 | 
				
			||||||
 | 
					): ...
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class Example:
 | 
				
			||||||
 | 
					    x: Doc
 | 
				
			||||||
 | 
					    y: Doc
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def __init__(
 | 
				
			||||||
 | 
					        self,
 | 
				
			||||||
 | 
					        predicted: Doc,
 | 
				
			||||||
 | 
					        reference: Doc,
 | 
				
			||||||
 | 
					        *,
 | 
				
			||||||
 | 
					        alignment: Optional[Alignment] = None,
 | 
				
			||||||
 | 
					    ): ...
 | 
				
			||||||
 | 
					    def __len__(self) -> int: ...
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def predicted(self) -> Doc: ...
 | 
				
			||||||
 | 
					    @predicted.setter
 | 
				
			||||||
 | 
					    def predicted(self, doc: Doc) -> None: ...
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def reference(self) -> Doc: ...
 | 
				
			||||||
 | 
					    @reference.setter
 | 
				
			||||||
 | 
					    def reference(self, doc: Doc) -> None: ...
 | 
				
			||||||
 | 
					    def copy(self) -> Example: ...
 | 
				
			||||||
 | 
					    @classmethod
 | 
				
			||||||
 | 
					    def from_dict(cls, predicted: Doc, example_dict: Dict[str, Any]) -> Example: ...
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def alignment(self) -> Alignment: ...
 | 
				
			||||||
 | 
					    def get_aligned(self, field: str, as_string=False): ...
 | 
				
			||||||
 | 
					    def get_aligned_parse(self, projectivize=True): ...
 | 
				
			||||||
 | 
					    def get_aligned_sent_starts(self): ...
 | 
				
			||||||
 | 
					    def get_aligned_spans_x2y(self, x_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
 | 
				
			||||||
 | 
					    def get_aligned_spans_y2x(self, y_spans: Sequence[Span], allow_overlap=False) -> List[Span]: ...
 | 
				
			||||||
 | 
					    def get_aligned_ents_and_ner(self) -> Tuple[List[Span], List[str]]: ...
 | 
				
			||||||
 | 
					    def get_aligned_ner(self) -> List[str]: ...
 | 
				
			||||||
 | 
					    def get_matching_ents(self, check_label: bool = True) -> List[Span]: ...
 | 
				
			||||||
 | 
					    def to_dict(self) -> Dict[str, Any]: ...
 | 
				
			||||||
 | 
					    def split_sents(self) -> List[Example]: ...
 | 
				
			||||||
 | 
					    @property
 | 
				
			||||||
 | 
					    def text(self) -> str: ...
 | 
				
			||||||
 | 
					    def __str__(self) -> str: ...
 | 
				
			||||||
 | 
					    def __repr__(self) -> str: ...
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user