mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
Clarify error when words are of wrong type (#9541)
* Clarify error when words are of wrong type See #9437 * Update docs * Use try/except * Apply suggestions from code review Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
2fd8d616e7
commit
006df1ae1f
|
@ -877,6 +877,7 @@ class Errors:
|
||||||
"filename. Specify an epoch to resume from.")
|
"filename. Specify an epoch to resume from.")
|
||||||
E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. "
|
E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. "
|
||||||
"Non-UD tags should use the `tag` property.")
|
"Non-UD tags should use the `tag` property.")
|
||||||
|
E1022 = ("Words must be of type str or int, but input is of type '{wtype}'")
|
||||||
|
|
||||||
|
|
||||||
# Deprecated model shortcuts, only used in errors and warnings
|
# Deprecated model shortcuts, only used in errors and warnings
|
||||||
|
|
|
@ -194,11 +194,12 @@ cdef class Doc:
|
||||||
|
|
||||||
vocab (Vocab): A vocabulary object, which must match any models you
|
vocab (Vocab): A vocabulary object, which must match any models you
|
||||||
want to use (e.g. tokenizer, parser, entity recognizer).
|
want to use (e.g. tokenizer, parser, entity recognizer).
|
||||||
words (Optional[List[str]]): A list of unicode strings to add to the document
|
words (Optional[List[Union[str, int]]]): A list of unicode strings or
|
||||||
as words. If `None`, defaults to empty list.
|
hash values to add to the document as words. If `None`, defaults to
|
||||||
spaces (Optional[List[bool]]): A list of boolean values, of the same length as
|
empty list.
|
||||||
words. True means that the word is followed by a space, False means
|
spaces (Optional[List[bool]]): A list of boolean values, of the same
|
||||||
it is not. If `None`, defaults to `[True]*len(words)`
|
length as `words`. `True` means that the word is followed by a space,
|
||||||
|
`False` means it is not. If `None`, defaults to `[True]*len(words)`
|
||||||
user_data (dict or None): Optional extra data to attach to the Doc.
|
user_data (dict or None): Optional extra data to attach to the Doc.
|
||||||
tags (Optional[List[str]]): A list of unicode strings, of the same
|
tags (Optional[List[str]]): A list of unicode strings, of the same
|
||||||
length as words, to assign as token.tag. Defaults to None.
|
length as words, to assign as token.tag. Defaults to None.
|
||||||
|
@ -266,7 +267,10 @@ cdef class Doc:
|
||||||
elif isinstance(word, bytes):
|
elif isinstance(word, bytes):
|
||||||
raise ValueError(Errors.E028.format(value=word))
|
raise ValueError(Errors.E028.format(value=word))
|
||||||
else:
|
else:
|
||||||
|
try:
|
||||||
lexeme = self.vocab.get_by_orth(self.mem, word)
|
lexeme = self.vocab.get_by_orth(self.mem, word)
|
||||||
|
except TypeError:
|
||||||
|
raise TypeError(Errors.E1022.format(wtype=type(word)))
|
||||||
self.push_back(lexeme, has_space)
|
self.push_back(lexeme, has_space)
|
||||||
|
|
||||||
if heads is not None:
|
if heads is not None:
|
||||||
|
|
|
@ -34,7 +34,7 @@ Construct a `Doc` object. The most common way to get a `Doc` object is via the
|
||||||
| Name | Description |
|
| Name | Description |
|
||||||
| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||||
| `vocab` | A storage container for lexical types. ~~Vocab~~ |
|
| `vocab` | A storage container for lexical types. ~~Vocab~~ |
|
||||||
| `words` | A list of strings to add to the container. ~~Optional[List[str]]~~ |
|
| `words` | A list of strings or integer hash values to add to the document as words. ~~Optional[List[Union[str,int]]]~~ |
|
||||||
| `spaces` | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~ |
|
| `spaces` | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~ |
|
||||||
| _keyword-only_ | |
|
| _keyword-only_ | |
|
||||||
| `user\_data` | Optional extra data to attach to the Doc. ~~Dict~~ |
|
| `user\_data` | Optional extra data to attach to the Doc. ~~Dict~~ |
|
||||||
|
|
Loading…
Reference in New Issue
Block a user