mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Improve typing hints for Matcher.__call__ * Add typing hints for DependencyMatcher * Add typing hints to underscore extensions * Update Doc.tensor type (requires numpy 1.21) * Fix typing hints for Language.component decorator * Use generic np.ndarray type in Doc to avoid numpy version update * Fix mypy errors * Fix cyclic import caused by Underscore typing hints * Use Literal type from spacy.compat * Update matcher.pyi import format Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com>
		
			
				
	
	
		
			176 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			176 lines
		
	
	
		
			5.6 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| from typing import Callable, Protocol, Iterable, Iterator, Optional
 | |
| from typing import Union, Tuple, List, Dict, Any, overload
 | |
| from cymem.cymem import Pool
 | |
| from thinc.types import Floats1d, Floats2d, Ints2d
 | |
| from .span import Span
 | |
| from .token import Token
 | |
| from ._dict_proxies import SpanGroups
 | |
| from ._retokenize import Retokenizer
 | |
| from ..lexeme import Lexeme
 | |
| from ..vocab import Vocab
 | |
| from .underscore import Underscore
 | |
| from pathlib import Path
 | |
| import numpy as np
 | |
| 
 | |
| class DocMethod(Protocol):
 | |
|     def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ...  # type: ignore[misc]
 | |
| 
 | |
| class Doc:
 | |
|     vocab: Vocab
 | |
|     mem: Pool
 | |
|     spans: SpanGroups
 | |
|     max_length: int
 | |
|     length: int
 | |
|     sentiment: float
 | |
|     cats: Dict[str, float]
 | |
|     user_hooks: Dict[str, Callable[..., Any]]
 | |
|     user_token_hooks: Dict[str, Callable[..., Any]]
 | |
|     user_span_hooks: Dict[str, Callable[..., Any]]
 | |
|     tensor: np.ndarray[Any, np.dtype[np.float_]]
 | |
|     user_data: Dict[str, Any]
 | |
|     has_unknown_spaces: bool
 | |
|     _context: Any
 | |
|     @classmethod
 | |
|     def set_extension(
 | |
|         cls,
 | |
|         name: str,
 | |
|         default: Optional[Any] = ...,
 | |
|         getter: Optional[Callable[[Doc], Any]] = ...,
 | |
|         setter: Optional[Callable[[Doc, Any], None]] = ...,
 | |
|         method: Optional[DocMethod] = ...,
 | |
|         force: bool = ...,
 | |
|     ) -> None: ...
 | |
|     @classmethod
 | |
|     def get_extension(
 | |
|         cls, name: str
 | |
|     ) -> Tuple[
 | |
|         Optional[Any],
 | |
|         Optional[DocMethod],
 | |
|         Optional[Callable[[Doc], Any]],
 | |
|         Optional[Callable[[Doc, Any], None]],
 | |
|     ]: ...
 | |
|     @classmethod
 | |
|     def has_extension(cls, name: str) -> bool: ...
 | |
|     @classmethod
 | |
|     def remove_extension(
 | |
|         cls, name: str
 | |
|     ) -> Tuple[
 | |
|         Optional[Any],
 | |
|         Optional[DocMethod],
 | |
|         Optional[Callable[[Doc], Any]],
 | |
|         Optional[Callable[[Doc, Any], None]],
 | |
|     ]: ...
 | |
|     def __init__(
 | |
|         self,
 | |
|         vocab: Vocab,
 | |
|         words: Optional[List[str]] = ...,
 | |
|         spaces: Optional[List[bool]] = ...,
 | |
|         user_data: Optional[Dict[Any, Any]] = ...,
 | |
|         tags: Optional[List[str]] = ...,
 | |
|         pos: Optional[List[str]] = ...,
 | |
|         morphs: Optional[List[str]] = ...,
 | |
|         lemmas: Optional[List[str]] = ...,
 | |
|         heads: Optional[List[int]] = ...,
 | |
|         deps: Optional[List[str]] = ...,
 | |
|         sent_starts: Optional[List[Union[bool, None]]] = ...,
 | |
|         ents: Optional[List[str]] = ...,
 | |
|     ) -> None: ...
 | |
|     @property
 | |
|     def _(self) -> Underscore: ...
 | |
|     @property
 | |
|     def is_tagged(self) -> bool: ...
 | |
|     @property
 | |
|     def is_parsed(self) -> bool: ...
 | |
|     @property
 | |
|     def is_nered(self) -> bool: ...
 | |
|     @property
 | |
|     def is_sentenced(self) -> bool: ...
 | |
|     def has_annotation(
 | |
|         self, attr: Union[int, str], *, require_complete: bool = ...
 | |
|     ) -> bool: ...
 | |
|     @overload
 | |
|     def __getitem__(self, i: int) -> Token: ...
 | |
|     @overload
 | |
|     def __getitem__(self, i: slice) -> Span: ...
 | |
|     def __iter__(self) -> Iterator[Token]: ...
 | |
|     def __len__(self) -> int: ...
 | |
|     def __unicode__(self) -> str: ...
 | |
|     def __bytes__(self) -> bytes: ...
 | |
|     def __str__(self) -> str: ...
 | |
|     def __repr__(self) -> str: ...
 | |
|     @property
 | |
|     def doc(self) -> Doc: ...
 | |
|     def char_span(
 | |
|         self,
 | |
|         start_idx: int,
 | |
|         end_idx: int,
 | |
|         label: Union[int, str] = ...,
 | |
|         kb_id: Union[int, str] = ...,
 | |
|         vector: Optional[Floats1d] = ...,
 | |
|         alignment_mode: str = ...,
 | |
|     ) -> Span: ...
 | |
|     def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
 | |
|     @property
 | |
|     def has_vector(self) -> bool: ...
 | |
|     vector: Floats1d
 | |
|     vector_norm: float
 | |
|     @property
 | |
|     def text(self) -> str: ...
 | |
|     @property
 | |
|     def text_with_ws(self) -> str: ...
 | |
|     ents: Tuple[Span]
 | |
|     def set_ents(
 | |
|         self,
 | |
|         entities: List[Span],
 | |
|         *,
 | |
|         blocked: Optional[List[Span]] = ...,
 | |
|         missing: Optional[List[Span]] = ...,
 | |
|         outside: Optional[List[Span]] = ...,
 | |
|         default: str = ...
 | |
|     ) -> None: ...
 | |
|     @property
 | |
|     def noun_chunks(self) -> Iterator[Span]: ...
 | |
|     @property
 | |
|     def sents(self) -> Iterator[Span]: ...
 | |
|     @property
 | |
|     def lang(self) -> int: ...
 | |
|     @property
 | |
|     def lang_(self) -> str: ...
 | |
|     def count_by(
 | |
|         self, attr_id: int, exclude: Optional[Any] = ..., counts: Optional[Any] = ...
 | |
|     ) -> Dict[Any, int]: ...
 | |
|     def from_array(
 | |
|         self, attrs: Union[int, str, List[Union[int, str]]], array: Ints2d
 | |
|     ) -> Doc: ...
 | |
|     def to_array(
 | |
|         self, py_attr_ids: Union[int, str, List[Union[int, str]]]
 | |
|     ) -> np.ndarray[Any, np.dtype[np.float_]]: ...
 | |
|     @staticmethod
 | |
|     def from_docs(
 | |
|         docs: List[Doc],
 | |
|         ensure_whitespace: bool = ...,
 | |
|         attrs: Optional[Union[Tuple[Union[str, int]], List[Union[int, str]]]] = ...,
 | |
|     ) -> Doc: ...
 | |
|     def get_lca_matrix(self) -> Ints2d: ...
 | |
|     def copy(self) -> Doc: ...
 | |
|     def to_disk(
 | |
|         self, path: Union[str, Path], *, exclude: Iterable[str] = ...
 | |
|     ) -> None: ...
 | |
|     def from_disk(
 | |
|         self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ...
 | |
|     ) -> Doc: ...
 | |
|     def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
 | |
|     def from_bytes(
 | |
|         self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ...
 | |
|     ) -> Doc: ...
 | |
|     def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
 | |
|     def from_dict(
 | |
|         self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ...
 | |
|     ) -> Doc: ...
 | |
|     def extend_tensor(self, tensor: Floats2d) -> None: ...
 | |
|     def retokenize(self) -> Retokenizer: ...
 | |
|     def to_json(self, underscore: Optional[List[str]] = ...) -> Dict[str, Any]: ...
 | |
|     def to_utf8_array(self, nr_char: int = ...) -> Ints2d: ...
 | |
|     @staticmethod
 | |
|     def _get_array_attrs() -> Tuple[Any]: ...
 |