spaCy/spacy/vocab.pyi

from contextlib import contextmanager
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Union

from cymem.cymem import Pool
from thinc.types import Floats1d, FloatsXd

from . import Language
from .lexeme import Lexeme
from .lookups import Lookups
from .morphology import Morphology
from .strings import StringStore
from .tokens import Doc, Span
from .vectors import Vectors

def create_vocab(
    lang: Optional[str], defaults: Any, vectors_name: Optional[str] = ...
) -> Vocab: ...

class Vocab:
    cfg: Dict[str, Any]
    get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]]
    lookups: Lookups
    morphology: Morphology
    strings: StringStore
    vectors: Vectors
    writing_system: Dict[str, Any]
    def __init__(
        self,
        lex_attr_getters: Optional[Dict[str, Callable[[str], Any]]] = ...,
        strings: Optional[Union[List[str], StringStore]] = ...,
        lookups: Optional[Lookups] = ...,
        oov_prob: float = ...,
        vectors_name: Optional[str] = ...,
        writing_system: Dict[str, Any] = ...,
        get_noun_chunks: Optional[Callable[[Union[Doc, Span]], Iterator[Span]]] = ...,
    ) -> None: ...
    @property
    def lang(self) -> str: ...
    def __len__(self) -> int: ...
    def add_flag(
        self, flag_getter: Callable[[str], bool], flag_id: int = ...
    ) -> int: ...
    def __contains__(self, key: str) -> bool: ...
    def __iter__(self) -> Iterator[Lexeme]: ...
    def __getitem__(self, id_or_string: Union[str, int]) -> Lexeme: ...
    @property
    def vectors_length(self) -> int: ...
    def reset_vectors(
        self, *, width: Optional[int] = ..., shape: Optional[int] = ...
    ) -> None: ...
    def deduplicate_vectors(self) -> None: ...
    def prune_vectors(self, nr_row: int, batch_size: int = ...) -> Dict[str, float]: ...
    def get_vector(
        self,
        orth: Union[int, str],
        minn: Optional[int] = ...,
        maxn: Optional[int] = ...,
    ) -> FloatsXd: ...
    def set_vector(self, orth: Union[int, str], vector: Floats1d) -> None: ...
    def has_vector(self, orth: Union[int, str]) -> bool: ...
    def to_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = ...
    ) -> None: ...
    def from_disk(
        self, path: Union[str, Path], *, exclude: Iterable[str] = ...
    ) -> Vocab: ...
    def to_bytes(self, *, exclude: Iterable[str] = ...) -> bytes: ...
    def from_bytes(
        self, bytes_data: bytes, *, exclude: Iterable[str] = ...
    ) -> Vocab: ...
    @contextmanager
    def memory_zone(self, mem: Optional[Pool] = None) -> Iterator[Pool]: ...

def pickle_vocab(vocab: Vocab) -> Any: ...
def unpickle_vocab(
    sstore: StringStore,
    vectors: Any,
    morphology: Any,
    _unused_object: Any,
    lex_attr_getters: Any,
    lookups: Any,
    get_noun_chunks: Any,
) -> Vocab: ...