Use "raise ... from" in custom errors for better tracebacks

This commit is contained in:
Ines Montani 2020-08-05 23:53:21 +02:00
parent 5cc0d89fad
commit 56c17973aa
22 changed files with 49 additions and 49 deletions

View File

@ -211,7 +211,7 @@ def create_evaluation_callback(
except KeyError as e:
keys = list(scores.keys())
err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
raise KeyError(err)
raise KeyError(err) from None
return weighted_score, scores
return evaluate
@ -369,7 +369,7 @@ def setup_printer(
Errors.E983.format(
dict="scores (losses)", key=str(e), keys=list(info["losses"].keys())
)
)
) from None
try:
scores = [
@ -382,7 +382,7 @@ def setup_printer(
key=str(e),
keys=list(info["other_scores"].keys()),
)
)
) from None
data = (
[info["epoch"], info["step"]]
+ losses

View File

@ -139,7 +139,7 @@ cdef class Example:
def get_aligned_spans_y2x(self, y_spans):
return self._get_aligned_spans(self.x, y_spans, self.alignment.y2x)
def _get_aligned_spans(self, doc, spans, align):
seen = set()
output = []
@ -207,7 +207,7 @@ cdef class Example:
sent_starts and return a list of the new Examples"""
if not self.reference.is_sentenced:
return [self]
align = self.alignment.y2x
seen_indices = set()
output = []
@ -267,7 +267,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
values.append([vocab.strings.add(v) for v in value])
except TypeError:
types= set([type(v) for v in value])
raise TypeError(Errors.E969.format(field=key, types=types))
raise TypeError(Errors.E969.format(field=key, types=types)) from None
array = numpy.asarray(values, dtype="uint64")
return attrs, array.T

View File

@ -200,7 +200,7 @@ def try_sudachi_import(split_mode="A"):
"(https://github.com/WorksApplications/SudachiPy). "
"Install with `pip install sudachipy sudachidict_core` or "
"install spaCy with `pip install spacy[ja]`."
)
) from None
def resolve_pos(orth, tag, next_tag):
@ -263,7 +263,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
try:
word_start = text[text_pos:].index(word)
except ValueError:
raise ValueError(Errors.E194.format(text=text, words=words))
raise ValueError(Errors.E194.format(text=text, words=words)) from None
# space token
if word_start > 0:

View File

@ -85,7 +85,7 @@ def try_mecab_import() -> None:
"Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), "
"[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
"and [natto-py](https://github.com/buruzaemon/natto-py)"
)
) from None
def check_spaces(text, tokens):

View File

@ -21,7 +21,7 @@ class RussianLemmatizer(Lemmatizer):
'try to fix it with "pip install pymorphy2==0.8" '
'or "pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
"if you need Ukrainian too"
)
) from None
if RussianLemmatizer._morph is None:
RussianLemmatizer._morph = MorphAnalyzer()

View File

@ -31,7 +31,7 @@ class ThaiTokenizer(DummyTokenizer):
raise ImportError(
"The Thai tokenizer requires the PyThaiNLP library: "
"https://github.com/PyThaiNLP/pythainlp"
)
) from None
self.word_tokenize = word_tokenize
self.vocab = nlp.vocab

View File

@ -23,7 +23,7 @@ class UkrainianLemmatizer(Lemmatizer):
"The Ukrainian lemmatizer requires the pymorphy2 library and "
'dictionaries: try to fix it with "pip uninstall pymorphy2" and'
'"pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
)
) from None
def __call__(
self, string: str, univ_pos: str, morphology: Optional[dict] = None

View File

@ -38,7 +38,7 @@ class VietnameseTokenizer(DummyTokenizer):
"Pyvi not installed. Either set use_pyvi = False, "
"or install it https://pypi.python.org/pypi/pyvi"
)
raise ImportError(msg)
raise ImportError(msg) from None
def __call__(self, text: str) -> Doc:
if self.use_pyvi:

View File

@ -129,7 +129,7 @@ class ChineseTokenizer(DummyTokenizer):
"pkuseg not installed: unable to reset pkuseg "
"user dict. Please " + _PKUSEG_INSTALL_MSG
)
raise ImportError(msg)
raise ImportError(msg) from None
for word in words:
self.pkuseg_seg.preprocesser.insert(word.strip(), "")
else:
@ -208,7 +208,7 @@ class ChineseTokenizer(DummyTokenizer):
raise ImportError(
"pkuseg not installed. To use this model, "
+ _PKUSEG_INSTALL_MSG
)
) from None
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
if pkuseg_data["processors_data"]:
processors_data = pkuseg_data["processors_data"]
@ -258,7 +258,7 @@ class ChineseTokenizer(DummyTokenizer):
raise ImportError(
"pkuseg not installed. To use this model, "
+ _PKUSEG_INSTALL_MSG
)
) from None
if path.exists():
self.pkuseg_seg = pkuseg.pkuseg(path)
@ -267,7 +267,7 @@ class ChineseTokenizer(DummyTokenizer):
import pkuseg
except ImportError:
if self.segmenter == Segmenter.pkuseg:
raise ImportError(self._pkuseg_install_msg)
raise ImportError(self._pkuseg_install_msg) from None
if self.segmenter == Segmenter.pkuseg:
data = srsly.read_msgpack(path)
(user_dict, do_process, common_words, other_words) = data
@ -311,7 +311,7 @@ def try_jieba_import(segmenter: str) -> None:
"Jieba not installed. To use jieba, install it with `pip "
" install jieba` or from https://github.com/fxsjy/jieba"
)
raise ImportError(msg)
raise ImportError(msg) from None
def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str) -> None:
@ -332,11 +332,11 @@ def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str)
except ImportError:
if segmenter == Segmenter.pkuseg:
msg = "pkuseg not installed. To use pkuseg, " + _PKUSEG_INSTALL_MSG
raise ImportError(msg)
raise ImportError(msg) from None
except FileNotFoundError:
if segmenter == Segmenter.pkuseg:
msg = "Unable to load pkuseg model from: " + pkuseg_model
raise FileNotFoundError(msg)
raise FileNotFoundError(msg) from None
def _get_pkuseg_trie_data(node, path=""):

View File

@ -869,7 +869,7 @@ class Language:
try:
doc = proc(doc, **component_cfg.get(name, {}))
except KeyError:
raise ValueError(Errors.E109.format(name=name))
raise ValueError(Errors.E109.format(name=name)) from None
if doc is None:
raise ValueError(Errors.E005.format(name=name))
return doc

View File

@ -131,7 +131,7 @@ cdef class Matcher:
for attr, _ in spec[1]:
self._seen_attrs.add(attr)
except OverflowError, AttributeError:
raise ValueError(Errors.E154.format())
raise ValueError(Errors.E154.format()) from None
self._patterns.setdefault(key, [])
self._callbacks[key] = on_match
self._filter[key] = greedy

View File

@ -85,7 +85,7 @@ class AttributeRuler(Pipe):
span=[t.text for t in span],
index=index,
)
)
) from None
set_token_attrs(token, attrs)
return doc

View File

@ -195,7 +195,7 @@ class EntityLinker(Pipe):
types = set([type(eg) for eg in examples])
raise TypeError(
Errors.E978.format(name="EntityLinker", method="update", types=types)
)
) from None
if set_annotations:
# This seems simpler than other ways to get that exact output -- but
# it does run the model twice :(
@ -213,7 +213,7 @@ class EntityLinker(Pipe):
sent_index = sentences.index(ent.sent)
except AttributeError:
# Catch the exception when ent.sent is None and provide a user-friendly warning
raise RuntimeError(Errors.E030)
raise RuntimeError(Errors.E030) from None
# get n previous sentences, if there are any
start_sentence = max(0, sent_index - self.n_sents)
# get n posterior sentences, or as many < n as there are
@ -439,7 +439,7 @@ class EntityLinker(Pipe):
try:
self.model.from_bytes(p.open("rb").read())
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
def load_kb(p):
self.kb = KnowledgeBase(entity_vector_length=self.cfg["entity_width"])

View File

@ -262,7 +262,7 @@ class Morphologizer(Tagger):
try:
self.model.from_bytes(b)
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {
"vocab": lambda b: self.vocab.from_bytes(b),
@ -301,7 +301,7 @@ class Morphologizer(Tagger):
try:
self.model.from_bytes(file_.read())
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {
"vocab": lambda p: self.vocab.from_disk(p),

View File

@ -211,7 +211,7 @@ class ClozeMultitask(Pipe):
predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
except AttributeError:
types = set([type(eg) for eg in examples])
raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types)) from None
loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
bp_predictions(d_predictions)
if sgd is not None:

View File

@ -204,7 +204,7 @@ cdef class Pipe:
try:
self.model.from_bytes(b)
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {}
if hasattr(self, "vocab"):
@ -242,7 +242,7 @@ cdef class Pipe:
try:
self.model.from_bytes(p.open("rb").read())
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {}
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)

View File

@ -183,7 +183,7 @@ class SentenceRecognizer(Tagger):
try:
self.model.from_bytes(b)
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {
"vocab": lambda b: self.vocab.from_bytes(b),
@ -222,7 +222,7 @@ class SentenceRecognizer(Tagger):
try:
self.model.from_bytes(file_.read())
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
deserialize = {
"vocab": lambda p: self.vocab.from_disk(p),

View File

@ -195,7 +195,7 @@ class Tagger(Pipe):
return
except AttributeError:
types = set([type(eg) for eg in examples])
raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types))
raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types)) from None
set_dropout_rate(self.model, drop)
tag_scores, bp_tag_scores = self.model.begin_update(
[eg.predicted for eg in examples])
@ -232,7 +232,7 @@ class Tagger(Pipe):
docs = [eg.predicted for eg in examples]
except AttributeError:
types = set([type(eg) for eg in examples])
raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types)) from None
if self._rehearsal_model is None:
return
if not any(len(doc) for doc in docs):
@ -292,7 +292,7 @@ class Tagger(Pipe):
try:
y = example.y
except AttributeError:
raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) from None
for token in y:
tag = token.tag_
if tag in orig_tag_map:
@ -400,7 +400,7 @@ class Tagger(Pipe):
try:
self.model.from_bytes(b)
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
def load_tag_map(b):
tag_map = srsly.msgpack_loads(b)
@ -456,7 +456,7 @@ class Tagger(Pipe):
try:
self.model.from_bytes(file_.read())
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
def load_tag_map(p):
tag_map = srsly.read_msgpack(p)

View File

@ -203,7 +203,7 @@ class TextCategorizer(Pipe):
types = set([type(eg) for eg in examples])
raise TypeError(
Errors.E978.format(name="TextCategorizer", method="update", types=types)
)
) from None
set_dropout_rate(self.model, drop)
scores, bp_scores = self.model.begin_update([eg.predicted for eg in examples])
loss, d_scores = self.get_loss(examples, scores)
@ -250,7 +250,7 @@ class TextCategorizer(Pipe):
err = Errors.E978.format(
name="TextCategorizer", method="rehearse", types=types
)
raise TypeError(err)
raise TypeError(err) from None
if not any(len(doc) for doc in docs):
# Handle cases where there are no tokens in any docs.
return losses
@ -351,7 +351,7 @@ class TextCategorizer(Pipe):
err = Errors.E978.format(
name="TextCategorizer", method="update", types=type(example)
)
raise TypeError(err)
raise TypeError(err) from None
for cat in y.cats:
self.add_label(cat)
self.require_labels()

View File

@ -473,7 +473,7 @@ cdef class Parser(Pipe):
self._resize()
self.model.from_bytes(bytes_data)
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
return self
def to_bytes(self, exclude=tuple()):
@ -498,7 +498,7 @@ cdef class Parser(Pipe):
try:
self.model.from_bytes(msg['model'])
except AttributeError:
raise ValueError(Errors.E149)
raise ValueError(Errors.E149) from None
return self
def _init_gold_batch(self, examples, min_length=5, max_length=500):

View File

@ -699,7 +699,7 @@ cdef class Doc:
for id_ in py_attr_ids]
except KeyError as msg:
keys = [k for k in IDS.keys() if not k.startswith("FLAG")]
raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys))
raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None
# Make an array from the attributes --- otherwise our inner loop is
# Python dict iteration.
cdef np.ndarray attr_ids = numpy.asarray(py_attr_ids, dtype="i")

View File

@ -138,7 +138,7 @@ def get_lang_class(lang: str) -> "Language":
try:
module = importlib.import_module(f".lang.{lang}", "spacy")
except ImportError as err:
raise ImportError(Errors.E048.format(lang=lang, err=err))
raise ImportError(Errors.E048.format(lang=lang, err=err)) from err
set_lang_class(lang, getattr(module, module.__all__[0]))
return registry.languages.get(lang)
@ -502,7 +502,7 @@ def run_command(command: Union[str, List[str]]) -> None:
except FileNotFoundError:
raise FileNotFoundError(
Errors.E970.format(str_command=" ".join(command), tool=command[0])
)
) from None
if status != 0:
sys.exit(status)
@ -891,7 +891,7 @@ def get_words_and_spaces(
try:
word_start = text[text_pos:].index(word)
except ValueError:
raise ValueError(Errors.E194.format(text=text, words=words))
raise ValueError(Errors.E194.format(text=text, words=words)) from None
if word_start > 0:
text_words.append(text[text_pos : text_pos + word_start])
text_spaces.append(False)
@ -918,7 +918,7 @@ def copy_config(config: Union[Dict[str, Any], Config]) -> Config:
try:
return Config(config).copy()
except ValueError:
raise ValueError(Errors.E961.format(config=config))
raise ValueError(Errors.E961.format(config=config)) from None
def deep_merge_configs(
@ -1002,7 +1002,7 @@ def dot_to_object(config: Config, section: str):
try:
component = component[item]
except (KeyError, TypeError):
raise KeyError(Errors.E952.format(name=section))
raise KeyError(Errors.E952.format(name=section)) from None
return component