from typing import Callable, Protocol, Iterable, Iterator, Optional from typing import Union, Tuple, List, Dict, Any, overload from cymem.cymem import Pool from thinc.types import ArrayXd, Floats1d, Floats2d, Ints2d, Ragged from .span import Span from .token import Token from .span_groups import SpanGroups from .retokenizer import Retokenizer from ..lexeme import Lexeme from ..vocab import Vocab from .underscore import Underscore from pathlib import Path import numpy as np class DocMethod(Protocol): def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ... # type: ignore[misc] class Doc: vocab: Vocab mem: Pool spans: SpanGroups max_length: int length: int activations: Dict[str, Dict[str, Union[ArrayXd, Ragged]]] cats: Dict[str, float] user_hooks: Dict[str, Callable[..., Any]] user_token_hooks: Dict[str, Callable[..., Any]] user_span_hooks: Dict[str, Callable[..., Any]] tensor: np.ndarray[Any, np.dtype[np.float_]] user_data: Dict[str, Any] has_unknown_spaces: bool _context: Any @classmethod def set_extension( cls, name: str, default: Optional[Any] = ..., getter: Optional[Callable[[Doc], Any]] = ..., setter: Optional[Callable[[Doc, Any], None]] = ..., method: Optional[DocMethod] = ..., force: bool = ..., ) -> None: ... @classmethod def get_extension( cls, name: str ) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], Optional[Callable[[Doc, Any], None]], ]: ... @classmethod def has_extension(cls, name: str) -> bool: ... @classmethod def remove_extension( cls, name: str ) -> Tuple[ Optional[Any], Optional[DocMethod], Optional[Callable[[Doc], Any]], Optional[Callable[[Doc, Any], None]], ]: ... def __init__( self, vocab: Vocab, words: Optional[List[str]] = ..., spaces: Optional[List[bool]] = ..., user_data: Optional[Dict[Any, Any]] = ..., tags: Optional[List[str]] = ..., pos: Optional[List[str]] = ..., morphs: Optional[List[str]] = ..., lemmas: Optional[List[str]] = ..., heads: Optional[List[int]] = ..., deps: Optional[List[str]] = ..., sent_starts: Optional[List[Union[bool, int, None]]] = ..., ents: Optional[List[str]] = ..., ) -> None: ... @property def _(self) -> Underscore: ... @property def is_tagged(self) -> bool: ... @property def is_parsed(self) -> bool: ... @property def is_nered(self) -> bool: ... @property def is_sentenced(self) -> bool: ... def has_annotation( self, attr: Union[int, str], *, require_complete: bool = ... ) -> bool: ... @overload def __getitem__(self, i: int) -> Token: ... @overload def __getitem__(self, i: slice) -> Span: ... def __iter__(self) -> Iterator[Token]: ... def __len__(self) -> int: ... def __unicode__(self) -> str: ... def __bytes__(self) -> bytes: ... def __str__(self) -> str: ... def __repr__(self) -> str: ... @property def doc(self) -> Doc: ... def char_span( self, start_idx: int, end_idx: int, label: Union[int, str] = ..., kb_id: Union[int, str] = ..., vector: Optional[Floats1d] = ..., alignment_mode: str = ..., ) -> Span: ... def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... @property def has_vector(self) -> bool: ... vector: Floats1d vector_norm: float @property def text(self) -> str: ... @property def text_with_ws(self) -> str: ... ents: Tuple[Span] def set_ents( self, entities: List[Span], *, blocked: Optional[List[Span]] = ..., missing: Optional[List[Span]] = ..., outside: Optional[List[Span]] = ..., default: str = ... ) -> None: ... @property def noun_chunks(self) -> Iterator[Span]: ... @property def sents(self) -> Iterator[Span]: ... @property def lang(self) -> int: ... @property def lang_(self) -> str: ... def count_by( self, attr_id: int, exclude: Optional[Any] = ..., counts: Optional[Any] = ... ) -> Dict[Any, int]: ... def from_array( self, attrs: Union[int, str, List[Union[int, str]]], array: Ints2d ) -> Doc: ... def to_array( self, py_attr_ids: Union[int, str, List[Union[int, str]]] ) -> np.ndarray[Any, np.dtype[np.float_]]: ... @staticmethod def from_docs( docs: List[Doc], ensure_whitespace: bool = ..., attrs: Optional[Union[Tuple[Union[str, int]], List[Union[int, str]]]] = ..., ) -> Doc: ... def get_lca_matrix(self) -> Ints2d: ... def copy(self) -> Doc: ... def to_disk( self, path: Union[str, Path], *, exclude: Iterable[str] = ... ) -> None: ... def from_disk( self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... ) -> Doc: ... def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... def from_bytes( self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... ) -> Doc: ... def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... def from_dict( self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ... ) -> Doc: ... def extend_tensor(self, tensor: Floats2d) -> None: ... def retokenize(self) -> Retokenizer: ... def to_json(self, underscore: Optional[List[str]] = ...) -> Dict[str, Any]: ... def from_json( self, doc_json: Dict[str, Any] = ..., validate: bool = False ) -> Doc: ... def to_utf8_array(self, nr_char: int = ...) -> Ints2d: ... @staticmethod def _get_array_attrs() -> Tuple[Any]: ...