From 56fc3bc0f3403d32f6dbd27f8dc19c9687bcbcc8 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Mon, 18 Dec 2023 09:00:47 +0100 Subject: [PATCH] Type documentation fixes for Doc (#13187) * correct char_span output type - can be None * unify type of exclude parameter * black * further fixes to from_dict and to_dict * formatting --- spacy/tokens/doc.pyi | 14 ++++++-------- spacy/tokens/doc.pyx | 21 ++++++++------------- 2 files changed, 14 insertions(+), 21 deletions(-) diff --git a/spacy/tokens/doc.pyi b/spacy/tokens/doc.pyi index 365859d89..f0b68862c 100644 --- a/spacy/tokens/doc.pyi +++ b/spacy/tokens/doc.pyi @@ -125,7 +125,7 @@ class Doc: vector: Optional[Floats1d] = ..., alignment_mode: str = ..., span_id: Union[int, str] = ..., - ) -> Span: ... + ) -> Optional[Span]: ... def similarity(self, other: Union[Doc, Span, Token, Lexeme]) -> float: ... @property def has_vector(self) -> bool: ... @@ -179,15 +179,13 @@ class Doc: self, path: Union[str, Path], *, exclude: Iterable[str] = ... ) -> None: ... def from_disk( - self, path: Union[str, Path], *, exclude: Union[List[str], Tuple[str]] = ... + self, path: Union[str, Path], *, exclude: Iterable[str] = ... ) -> Doc: ... - def to_bytes(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... - def from_bytes( - self, bytes_data: bytes, *, exclude: Union[List[str], Tuple[str]] = ... - ) -> Doc: ... - def to_dict(self, *, exclude: Union[List[str], Tuple[str]] = ...) -> bytes: ... + def to_bytes(self, *, exclude: Iterable[str] = ...) -> bytes: ... + def from_bytes(self, bytes_data: bytes, *, exclude: Iterable[str] = ...) -> Doc: ... + def to_dict(self, *, exclude: Iterable[str] = ...) -> Dict[str, Any]: ... def from_dict( - self, msg: bytes, *, exclude: Union[List[str], Tuple[str]] = ... + self, msg: Dict[str, Any], *, exclude: Iterable[str] = ... ) -> Doc: ... def extend_tensor(self, tensor: Floats2d) -> None: ... def retokenize(self) -> Retokenizer: ... diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 745eb5ff3..181c0ce0f 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -1326,7 +1326,7 @@ cdef class Doc: path (str / Path): A path to a directory. Paths may be either strings or `Path`-like objects. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): The modified `Doc` object. DOCS: https://spacy.io/api/doc#from_disk @@ -1339,7 +1339,7 @@ cdef class Doc: def to_bytes(self, *, exclude=tuple()): """Serialize, i.e. export the document contents to a binary string. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (bytes): A losslessly serialized copy of the `Doc`, including all annotations. @@ -1351,7 +1351,7 @@ cdef class Doc: """Deserialize, i.e. import the document contents from a binary string. data (bytes): The string to load from. - exclude (list): String names of serialization fields to exclude. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): Itself. DOCS: https://spacy.io/api/doc#from_bytes @@ -1361,11 +1361,8 @@ cdef class Doc: def to_dict(self, *, exclude=tuple()): """Export the document contents to a dictionary for serialization. - exclude (list): String names of serialization fields to exclude. - RETURNS (bytes): A losslessly serialized copy of the `Doc`, including - all annotations. - - DOCS: https://spacy.io/api/doc#to_bytes + exclude (Iterable[str]): String names of serialization fields to exclude. + RETURNS (Dict[str, Any]): A dictionary representation of the `Doc` """ array_head = Doc._get_array_attrs() strings = set() @@ -1411,13 +1408,11 @@ cdef class Doc: return util.to_dict(serializers, exclude) def from_dict(self, msg, *, exclude=tuple()): - """Deserialize, i.e. import the document contents from a binary string. + """Deserialize the document contents from a dictionary representation. - data (bytes): The string to load from. - exclude (list): String names of serialization fields to exclude. + msg (Dict[str, Any]): The dictionary to load from. + exclude (Iterable[str]): String names of serialization fields to exclude. RETURNS (Doc): Itself. - - DOCS: https://spacy.io/api/doc#from_dict """ if self.length != 0: raise ValueError(Errors.E033.format(length=self.length))