Type documentation fixes for Doc (#13187)

* correct char_span output type - can be None

* unify type of exclude parameter

* black

* further fixes to from_dict and to_dict

* formatting
This commit is contained in:
Sofie Van Landeghem 2023-12-18 09:00:47 +01:00 committed by GitHub
parent 7df328fbfe
commit 56fc3bc0f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 14 additions and 21 deletions

View File

@ -125,7 +125,7 @@ class Doc:
vector: Optional[Floats1d] = ..., vector: Optional[Floats1d] = ...,
alignment_mode: str = ..., alignment_mode: str = ...,
span_id: Union[int, str] = ..., span_id: Union[int, str] = ...,
) -> Span: ... ) -> Optional[Span]: ...
def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ...
@property @property
def has_vector(self) -> bool: ... def has_vector(self) -> bool: ...
@ -179,15 +179,13 @@ class Doc:
self, path: Union[str, Path], *, exclude: Iterable[str] = ... self, path: Union[str, Path], *, exclude: Iterable[str] = ...
) -> None: ... ) -> None: ...
def from_disk( def from_disk(
self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... self, path: Union[str, Path], *, exclude: Iterable[str] = ...
) -> Doc: ... ) -> Doc: ...
def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... def to_bytes(self, *, exclude: Iterable[str] = ...) -> bytes: ...
def from_bytes( def from_bytes(self, bytes_data: bytes, *, exclude: Iterable[str] = ...) -> Doc: ...
self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... def to_dict(self, *, exclude: Iterable[str] = ...) -> Dict[str, Any]: ...
) -> Doc: ...
def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ...
def from_dict( def from_dict(
self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ... self, msg: Dict[str, Any], *, exclude: Iterable[str] = ...
) -> Doc: ... ) -> Doc: ...
def extend_tensor(self, tensor: Floats2d) -> None: ... def extend_tensor(self, tensor: Floats2d) -> None: ...
def retokenize(self) -> Retokenizer: ... def retokenize(self) -> Retokenizer: ...

View File

@ -1326,7 +1326,7 @@ cdef class Doc:
path (str / Path): A path to a directory. Paths may be either path (str / Path): A path to a directory. Paths may be either
strings or `Path`-like objects. strings or `Path`-like objects.
exclude (list): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Doc): The modified `Doc` object. RETURNS (Doc): The modified `Doc` object.
DOCS: https://spacy.io/api/doc#from_disk DOCS: https://spacy.io/api/doc#from_disk
@ -1339,7 +1339,7 @@ cdef class Doc:
def to_bytes(self, *, exclude=tuple()): def to_bytes(self, *, exclude=tuple()):
"""Serialize, i.e. export the document contents to a binary string. """Serialize, i.e. export the document contents to a binary string.
exclude (list): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including RETURNS (bytes): A losslessly serialized copy of the `Doc`, including
all annotations. all annotations.
@ -1351,7 +1351,7 @@ cdef class Doc:
"""Deserialize, i.e. import the document contents from a binary string. """Deserialize, i.e. import the document contents from a binary string.
data (bytes): The string to load from. data (bytes): The string to load from.
exclude (list): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Doc): Itself. RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_bytes DOCS: https://spacy.io/api/doc#from_bytes
@ -1361,11 +1361,8 @@ cdef class Doc:
def to_dict(self, *, exclude=tuple()): def to_dict(self, *, exclude=tuple()):
"""Export the document contents to a dictionary for serialization. """Export the document contents to a dictionary for serialization.
exclude (list): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (bytes): A losslessly serialized copy of the `Doc`, including RETURNS (Dict[str, Any]): A dictionary representation of the `Doc`
all annotations.
DOCS: https://spacy.io/api/doc#to_bytes
""" """
array_head = Doc._get_array_attrs() array_head = Doc._get_array_attrs()
strings = set() strings = set()
@ -1411,13 +1408,11 @@ cdef class Doc:
return util.to_dict(serializers, exclude) return util.to_dict(serializers, exclude)
def from_dict(self, msg, *, exclude=tuple()): def from_dict(self, msg, *, exclude=tuple()):
"""Deserialize, i.e. import the document contents from a binary string. """Deserialize the document contents from a dictionary representation.
data (bytes): The string to load from. msg (Dict[str, Any]): The dictionary to load from.
exclude (list): String names of serialization fields to exclude. exclude (Iterable[str]): String names of serialization fields to exclude.
RETURNS (Doc): Itself. RETURNS (Doc): Itself.
DOCS: https://spacy.io/api/doc#from_dict
""" """
if self.length != 0: if self.length != 0:
raise ValueError(Errors.E033.format(length=self.length)) raise ValueError(Errors.E033.format(length=self.length))