mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-23 07:44:12 +03:00
0737443096
* feat: add example stubs * fix: add required annotations * fix: mypy issues * fix: use Py36-compatible Portocol * Minor reformatting * adding further type specifications and removing internal methods * black formatting * widen type to iterable * add private methods that are being used by the built-in convertors * revert changes to corpus.py * fixes * fixes * fix typing of PlainTextCorpus --------- Co-authored-by: Basile Dura <basile@bdura.me> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
201 lines
6.1 KiB
Python
201 lines
6.1 KiB
Python
from pathlib import Path
|
|
from typing import (
|
|
Any,
|
|
Callable,
|
|
Dict,
|
|
Iterable,
|
|
Iterator,
|
|
List,
|
|
Optional,
|
|
Protocol,
|
|
Sequence,
|
|
Tuple,
|
|
Union,
|
|
overload,
|
|
)
|
|
|
|
import numpy as np
|
|
from cymem.cymem import Pool
|
|
from thinc.types import Floats1d, Floats2d, Ints2d
|
|
|
|
from ..lexeme import Lexeme
|
|
from ..vocab import Vocab
|
|
from ._dict_proxies import SpanGroups
|
|
from ._retokenize import Retokenizer
|
|
from .span import Span
|
|
from .token import Token
|
|
from .underscore import Underscore
|
|
|
|
DOCBIN_ALL_ATTRS: Tuple[str, ...]
|
|
|
|
class DocMethod(Protocol):
|
|
def __call__(self: Doc, *args: Any, **kwargs: Any) -> Any: ... # type: ignore[misc]
|
|
|
|
class Doc:
|
|
vocab: Vocab
|
|
mem: Pool
|
|
spans: SpanGroups
|
|
max_length: int
|
|
length: int
|
|
sentiment: float
|
|
cats: Dict[str, float]
|
|
user_hooks: Dict[str, Callable[..., Any]]
|
|
user_token_hooks: Dict[str, Callable[..., Any]]
|
|
user_span_hooks: Dict[str, Callable[..., Any]]
|
|
tensor: np.ndarray[Any, np.dtype[np.float_]]
|
|
user_data: Dict[str, Any]
|
|
has_unknown_spaces: bool
|
|
_context: Any
|
|
@classmethod
|
|
def set_extension(
|
|
cls,
|
|
name: str,
|
|
default: Optional[Any] = ...,
|
|
getter: Optional[Callable[[Doc], Any]] = ...,
|
|
setter: Optional[Callable[[Doc, Any], None]] = ...,
|
|
method: Optional[DocMethod] = ...,
|
|
force: bool = ...,
|
|
) -> None: ...
|
|
@classmethod
|
|
def get_extension(
|
|
cls, name: str
|
|
) -> Tuple[
|
|
Optional[Any],
|
|
Optional[DocMethod],
|
|
Optional[Callable[[Doc], Any]],
|
|
Optional[Callable[[Doc, Any], None]],
|
|
]: ...
|
|
@classmethod
|
|
def has_extension(cls, name: str) -> bool: ...
|
|
@classmethod
|
|
def remove_extension(
|
|
cls, name: str
|
|
) -> Tuple[
|
|
Optional[Any],
|
|
Optional[DocMethod],
|
|
Optional[Callable[[Doc], Any]],
|
|
Optional[Callable[[Doc, Any], None]],
|
|
]: ...
|
|
def __init__(
|
|
self,
|
|
vocab: Vocab,
|
|
words: Optional[List[str]] = ...,
|
|
spaces: Optional[List[bool]] = ...,
|
|
user_data: Optional[Dict[Any, Any]] = ...,
|
|
tags: Optional[List[str]] = ...,
|
|
pos: Optional[List[str]] = ...,
|
|
morphs: Optional[List[str]] = ...,
|
|
lemmas: Optional[List[str]] = ...,
|
|
heads: Optional[List[int]] = ...,
|
|
deps: Optional[List[str]] = ...,
|
|
sent_starts: Optional[List[Union[bool, int, None]]] = ...,
|
|
ents: Optional[List[str]] = ...,
|
|
) -> None: ...
|
|
@property
|
|
def _(self) -> Underscore: ...
|
|
@property
|
|
def is_tagged(self) -> bool: ...
|
|
@property
|
|
def is_parsed(self) -> bool: ...
|
|
@property
|
|
def is_nered(self) -> bool: ...
|
|
@property
|
|
def is_sentenced(self) -> bool: ...
|
|
def has_annotation(
|
|
self, attr: Union[int, str], *, require_complete: bool = ...
|
|
) -> bool: ...
|
|
@overload
|
|
def __getitem__(self, i: int) -> Token: ...
|
|
@overload
|
|
def __getitem__(self, i: slice) -> Span: ...
|
|
def __iter__(self) -> Iterator[Token]: ...
|
|
def __len__(self) -> int: ...
|
|
def __unicode__(self) -> str: ...
|
|
def __bytes__(self) -> bytes: ...
|
|
def __str__(self) -> str: ...
|
|
def __repr__(self) -> str: ...
|
|
@property
|
|
def doc(self) -> Doc: ...
|
|
def char_span(
|
|
self,
|
|
start_idx: int,
|
|
end_idx: int,
|
|
label: Union[int, str] = ...,
|
|
kb_id: Union[int, str] = ...,
|
|
vector: Optional[Floats1d] = ...,
|
|
alignment_mode: str = ...,
|
|
span_id: Union[int, str] = ...,
|
|
) -> Span: ...
|
|
def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
|
|
@property
|
|
def has_vector(self) -> bool: ...
|
|
vector: Floats1d
|
|
vector_norm: float
|
|
@property
|
|
def text(self) -> str: ...
|
|
@property
|
|
def text_with_ws(self) -> str: ...
|
|
# Ideally the getter would output Tuple[Span]
|
|
# see https://github.com/python/mypy/issues/3004
|
|
@property
|
|
def ents(self) -> Sequence[Span]: ...
|
|
@ents.setter
|
|
def ents(self, value: Sequence[Span]) -> None: ...
|
|
def set_ents(
|
|
self,
|
|
entities: List[Span],
|
|
*,
|
|
blocked: Optional[List[Span]] = ...,
|
|
missing: Optional[List[Span]] = ...,
|
|
outside: Optional[List[Span]] = ...,
|
|
default: str = ...
|
|
) -> None: ...
|
|
@property
|
|
def noun_chunks(self) -> Iterator[Span]: ...
|
|
@property
|
|
def sents(self) -> Iterator[Span]: ...
|
|
@property
|
|
def lang(self) -> int: ...
|
|
@property
|
|
def lang_(self) -> str: ...
|
|
def count_by(
|
|
self, attr_id: int, exclude: Optional[Any] = ..., counts: Optional[Any] = ...
|
|
) -> Dict[Any, int]: ...
|
|
def from_array(
|
|
self, attrs: Union[int, str, List[Union[int, str]]], array: Ints2d
|
|
) -> Doc: ...
|
|
def to_array(
|
|
self, py_attr_ids: Union[int, str, List[Union[int, str]]]
|
|
) -> np.ndarray[Any, np.dtype[np.float_]]: ...
|
|
@staticmethod
|
|
def from_docs(
|
|
docs: List[Doc],
|
|
ensure_whitespace: bool = ...,
|
|
attrs: Optional[Union[Tuple[Union[str, int]], List[Union[int, str]]]] = ...,
|
|
) -> Doc: ...
|
|
def get_lca_matrix(self) -> Ints2d: ...
|
|
def copy(self) -> Doc: ...
|
|
def to_disk(
|
|
self, path: Union[str, Path], *, exclude: Iterable[str] = ...
|
|
) -> None: ...
|
|
def from_disk(
|
|
self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ...
|
|
) -> Doc: ...
|
|
def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
|
|
def from_bytes(
|
|
self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ...
|
|
) -> Doc: ...
|
|
def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
|
|
def from_dict(
|
|
self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ...
|
|
) -> Doc: ...
|
|
def extend_tensor(self, tensor: Floats2d) -> None: ...
|
|
def retokenize(self) -> Retokenizer: ...
|
|
def to_json(self, underscore: Optional[List[str]] = ...) -> Dict[str, Any]: ...
|
|
def from_json(
|
|
self, doc_json: Dict[str, Any] = ..., validate: bool = False
|
|
) -> Doc: ...
|
|
def to_utf8_array(self, nr_char: int = ...) -> Ints2d: ...
|
|
@staticmethod
|
|
def _get_array_attrs() -> Tuple[Any]: ...
|