mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
Clarify error when words are of wrong type (#9541)
* Clarify error when words are of wrong type See #9437 * Update docs * Use try/except * Apply suggestions from code review Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com> Co-authored-by: Sofie Van Landeghem <svlandeg@users.noreply.github.com> Co-authored-by: Adriane Boyd <adrianeboyd@gmail.com>
This commit is contained in:
parent
2fd8d616e7
commit
006df1ae1f
|
@ -877,6 +877,7 @@ class Errors:
|
|||
"filename. Specify an epoch to resume from.")
|
||||
E1021 = ("`pos` value \"{pp}\" is not a valid Universal Dependencies tag. "
|
||||
"Non-UD tags should use the `tag` property.")
|
||||
E1022 = ("Words must be of type str or int, but input is of type '{wtype}'")
|
||||
|
||||
|
||||
# Deprecated model shortcuts, only used in errors and warnings
|
||||
|
|
|
@ -194,11 +194,12 @@ cdef class Doc:
|
|||
|
||||
vocab (Vocab): A vocabulary object, which must match any models you
|
||||
want to use (e.g. tokenizer, parser, entity recognizer).
|
||||
words (Optional[List[str]]): A list of unicode strings to add to the document
|
||||
as words. If `None`, defaults to empty list.
|
||||
spaces (Optional[List[bool]]): A list of boolean values, of the same length as
|
||||
words. True means that the word is followed by a space, False means
|
||||
it is not. If `None`, defaults to `[True]*len(words)`
|
||||
words (Optional[List[Union[str, int]]]): A list of unicode strings or
|
||||
hash values to add to the document as words. If `None`, defaults to
|
||||
empty list.
|
||||
spaces (Optional[List[bool]]): A list of boolean values, of the same
|
||||
length as `words`. `True` means that the word is followed by a space,
|
||||
`False` means it is not. If `None`, defaults to `[True]*len(words)`
|
||||
user_data (dict or None): Optional extra data to attach to the Doc.
|
||||
tags (Optional[List[str]]): A list of unicode strings, of the same
|
||||
length as words, to assign as token.tag. Defaults to None.
|
||||
|
@ -266,7 +267,10 @@ cdef class Doc:
|
|||
elif isinstance(word, bytes):
|
||||
raise ValueError(Errors.E028.format(value=word))
|
||||
else:
|
||||
lexeme = self.vocab.get_by_orth(self.mem, word)
|
||||
try:
|
||||
lexeme = self.vocab.get_by_orth(self.mem, word)
|
||||
except TypeError:
|
||||
raise TypeError(Errors.E1022.format(wtype=type(word)))
|
||||
self.push_back(lexeme, has_space)
|
||||
|
||||
if heads is not None:
|
||||
|
|
|
@ -34,7 +34,7 @@ Construct a `Doc` object. The most common way to get a `Doc` object is via the
|
|||
| Name | Description |
|
||||
| ---------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
|
||||
| `vocab` | A storage container for lexical types. ~~Vocab~~ |
|
||||
| `words` | A list of strings to add to the container. ~~Optional[List[str]]~~ |
|
||||
| `words` | A list of strings or integer hash values to add to the document as words. ~~Optional[List[Union[str,int]]]~~ |
|
||||
| `spaces` | A list of boolean values indicating whether each word has a subsequent space. Must have the same length as `words`, if specified. Defaults to a sequence of `True`. ~~Optional[List[bool]]~~ |
|
||||
| _keyword-only_ | |
|
||||
| `user\_data` | Optional extra data to attach to the Doc. ~~Dict~~ |
|
||||
|
|
Loading…
Reference in New Issue
Block a user