Use "raise ... from" in custom errors for better tracebacks

2025-10-26 05:31:15 +03:00 · 2020-08-05 23:53:21 +02:00 · 2020-08-05 23:53:21 +02:00 · 56c17973aa
commit 56c17973aa
parent 5cc0d89fad
22 changed files with 49 additions and 49 deletions
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@ -211,7 +211,7 @@ def create_evaluation_callback(
        except KeyError as e:
            keys = list(scores.keys())
            err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
-            raise KeyError(err)
+            raise KeyError(err) from None
        return weighted_score, scores
    return evaluate
@ -369,7 +369,7 @@ def setup_printer(
                Errors.E983.format(
                    dict="scores (losses)", key=str(e), keys=list(info["losses"].keys())
                )
-            )
+            ) from None
        try:
            scores = [
@ -382,7 +382,7 @@ def setup_printer(
                    key=str(e),
                    keys=list(info["other_scores"].keys()),
                )
-            )
+            ) from None
        data = (
            [info["epoch"], info["step"]]
            + losses
--- a/spacy/gold/example.pyx
+++ b/spacy/gold/example.pyx
@ -139,7 +139,7 @@ cdef class Example:
    def get_aligned_spans_y2x(self, y_spans):
        return self._get_aligned_spans(self.x, y_spans, self.alignment.y2x)
-    
+
    def _get_aligned_spans(self, doc, spans, align):
        seen = set()
        output = []
@ -207,7 +207,7 @@ cdef class Example:
        sent_starts and return a list of the new Examples"""
        if not self.reference.is_sentenced:
            return [self]
-        
+
        align = self.alignment.y2x
        seen_indices = set()
        output = []
@ -267,7 +267,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
                values.append([vocab.strings.add(v) for v in value])
            except TypeError:
                types= set([type(v) for v in value])
-                raise TypeError(Errors.E969.format(field=key, types=types))
+                raise TypeError(Errors.E969.format(field=key, types=types)) from None
    array = numpy.asarray(values, dtype="uint64")
    return attrs, array.T
--- a/spacy/lang/ja/init.py
+++ b/spacy/lang/ja/init.py
@ -200,7 +200,7 @@ def try_sudachi_import(split_mode="A"):
            "(https://github.com/WorksApplications/SudachiPy). "
            "Install with `pip install sudachipy sudachidict_core` or "
            "install spaCy with `pip install spacy[ja]`."
-        )
+        ) from None
 def resolve_pos(orth, tag, next_tag):
@ -263,7 +263,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
        try:
            word_start = text[text_pos:].index(word)
        except ValueError:
-            raise ValueError(Errors.E194.format(text=text, words=words))
+            raise ValueError(Errors.E194.format(text=text, words=words)) from None
        # space token
        if word_start > 0:
--- a/spacy/lang/ko/init.py
+++ b/spacy/lang/ko/init.py
@ -85,7 +85,7 @@ def try_mecab_import() -> None:
            "Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), "
            "[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
            "and [natto-py](https://github.com/buruzaemon/natto-py)"
-        )
+        ) from None
 def check_spaces(text, tokens):
--- a/spacy/lang/ru/lemmatizer.py
+++ b/spacy/lang/ru/lemmatizer.py
@ -21,7 +21,7 @@ class RussianLemmatizer(Lemmatizer):
                'try to fix it with "pip install pymorphy2==0.8" '
                'or "pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
                "if you need Ukrainian too"
-            )
+            ) from None
        if RussianLemmatizer._morph is None:
            RussianLemmatizer._morph = MorphAnalyzer()
--- a/spacy/lang/th/init.py
+++ b/spacy/lang/th/init.py
@ -31,7 +31,7 @@ class ThaiTokenizer(DummyTokenizer):
            raise ImportError(
                "The Thai tokenizer requires the PyThaiNLP library: "
                "https://github.com/PyThaiNLP/pythainlp"
-            )
+            ) from None
        self.word_tokenize = word_tokenize
        self.vocab = nlp.vocab
--- a/spacy/lang/uk/lemmatizer.py
+++ b/spacy/lang/uk/lemmatizer.py
@ -23,7 +23,7 @@ class UkrainianLemmatizer(Lemmatizer):
                "The Ukrainian lemmatizer requires the pymorphy2 library and "
                'dictionaries: try to fix it with "pip uninstall pymorphy2" and'
                '"pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
-            )
+            ) from None
    def __call__(
        self, string: str, univ_pos: str, morphology: Optional[dict] = None
--- a/spacy/lang/vi/init.py
+++ b/spacy/lang/vi/init.py
@ -38,7 +38,7 @@ class VietnameseTokenizer(DummyTokenizer):
                    "Pyvi not installed. Either set use_pyvi = False, "
                    "or install it https://pypi.python.org/pypi/pyvi"
                )
-                raise ImportError(msg)
+                raise ImportError(msg) from None
    def __call__(self, text: str) -> Doc:
        if self.use_pyvi:
--- a/spacy/lang/zh/init.py
+++ b/spacy/lang/zh/init.py
@ -129,7 +129,7 @@ class ChineseTokenizer(DummyTokenizer):
                        "pkuseg not installed: unable to reset pkuseg "
                        "user dict. Please " + _PKUSEG_INSTALL_MSG
                    )
-                    raise ImportError(msg)
+                    raise ImportError(msg) from None
            for word in words:
                self.pkuseg_seg.preprocesser.insert(word.strip(), "")
        else:
@ -208,7 +208,7 @@ class ChineseTokenizer(DummyTokenizer):
                    raise ImportError(
                        "pkuseg not installed. To use this model, "
                        + _PKUSEG_INSTALL_MSG
-                    )
+                    ) from None
                self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
            if pkuseg_data["processors_data"]:
                processors_data = pkuseg_data["processors_data"]
@ -258,7 +258,7 @@ class ChineseTokenizer(DummyTokenizer):
                    raise ImportError(
                        "pkuseg not installed. To use this model, "
                        + _PKUSEG_INSTALL_MSG
-                    )
+                    ) from None
            if path.exists():
                self.pkuseg_seg = pkuseg.pkuseg(path)
@ -267,7 +267,7 @@ class ChineseTokenizer(DummyTokenizer):
                import pkuseg
            except ImportError:
                if self.segmenter == Segmenter.pkuseg:
-                    raise ImportError(self._pkuseg_install_msg)
+                    raise ImportError(self._pkuseg_install_msg) from None
            if self.segmenter == Segmenter.pkuseg:
                data = srsly.read_msgpack(path)
                (user_dict, do_process, common_words, other_words) = data
@ -311,7 +311,7 @@ def try_jieba_import(segmenter: str) -> None:
                "Jieba not installed. To use jieba, install it with `pip "
                " install jieba` or from https://github.com/fxsjy/jieba"
            )
-            raise ImportError(msg)
+            raise ImportError(msg) from None
 def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str) -> None:
@ -332,11 +332,11 @@ def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str)
    except ImportError:
        if segmenter == Segmenter.pkuseg:
            msg = "pkuseg not installed. To use pkuseg, " + _PKUSEG_INSTALL_MSG
-            raise ImportError(msg)
+            raise ImportError(msg) from None
    except FileNotFoundError:
        if segmenter == Segmenter.pkuseg:
            msg = "Unable to load pkuseg model from: " + pkuseg_model
-            raise FileNotFoundError(msg)
+            raise FileNotFoundError(msg) from None
 def _get_pkuseg_trie_data(node, path=""):
--- a/spacy/language.py
+++ b/spacy/language.py
@ -869,7 +869,7 @@ class Language:
            try:
                doc = proc(doc, **component_cfg.get(name, {}))
            except KeyError:
-                raise ValueError(Errors.E109.format(name=name))
+                raise ValueError(Errors.E109.format(name=name)) from None
            if doc is None:
                raise ValueError(Errors.E005.format(name=name))
        return doc
--- a/spacy/matcher/matcher.pyx
+++ b/spacy/matcher/matcher.pyx
@ -131,7 +131,7 @@ cdef class Matcher:
                    for attr, _ in spec[1]:
                        self._seen_attrs.add(attr)
            except OverflowError, AttributeError:
-                raise ValueError(Errors.E154.format())
+                raise ValueError(Errors.E154.format()) from None
        self._patterns.setdefault(key, [])
        self._callbacks[key] = on_match
        self._filter[key] = greedy
--- a/spacy/pipeline/attributeruler.py
+++ b/spacy/pipeline/attributeruler.py
@ -85,7 +85,7 @@ class AttributeRuler(Pipe):
                        span=[t.text for t in span],
                        index=index,
                    )
-                )
+                ) from None
            set_token_attrs(token, attrs)
        return doc
--- a/spacy/pipeline/entity_linker.py
+++ b/spacy/pipeline/entity_linker.py
@ -195,7 +195,7 @@ class EntityLinker(Pipe):
            types = set([type(eg) for eg in examples])
            raise TypeError(
                Errors.E978.format(name="EntityLinker", method="update", types=types)
-            )
+            ) from None
        if set_annotations:
            # This seems simpler than other ways to get that exact output -- but
            # it does run the model twice :(
@ -213,7 +213,7 @@ class EntityLinker(Pipe):
                        sent_index = sentences.index(ent.sent)
                    except AttributeError:
                        # Catch the exception when ent.sent is None and provide a user-friendly warning
-                        raise RuntimeError(Errors.E030)
+                        raise RuntimeError(Errors.E030) from None
                    # get n previous sentences, if there are any
                    start_sentence = max(0, sent_index - self.n_sents)
                    # get n posterior sentences, or as many < n as there are
@ -439,7 +439,7 @@ class EntityLinker(Pipe):
            try:
                self.model.from_bytes(p.open("rb").read())
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        def load_kb(p):
            self.kb = KnowledgeBase(entity_vector_length=self.cfg["entity_width"])
--- a/spacy/pipeline/morphologizer.pyx
+++ b/spacy/pipeline/morphologizer.pyx
@ -262,7 +262,7 @@ class Morphologizer(Tagger):
            try:
                self.model.from_bytes(b)
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        deserialize = {
            "vocab": lambda b: self.vocab.from_bytes(b),
@ -301,7 +301,7 @@ class Morphologizer(Tagger):
                try:
                    self.model.from_bytes(file_.read())
                except AttributeError:
-                    raise ValueError(Errors.E149)
+                    raise ValueError(Errors.E149) from None
        deserialize = {
            "vocab": lambda p: self.vocab.from_disk(p),
--- a/spacy/pipeline/multitask.pyx
+++ b/spacy/pipeline/multitask.pyx
@ -211,7 +211,7 @@ class ClozeMultitask(Pipe):
            predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
        except AttributeError:
            types = set([type(eg) for eg in examples])
-            raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
+            raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types)) from None
        loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
        bp_predictions(d_predictions)
        if sgd is not None:
--- a/spacy/pipeline/pipe.pyx
+++ b/spacy/pipeline/pipe.pyx
@ -204,7 +204,7 @@ cdef class Pipe:
            try:
                self.model.from_bytes(b)
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        deserialize = {}
        if hasattr(self, "vocab"):
@ -242,7 +242,7 @@ cdef class Pipe:
            try:
                self.model.from_bytes(p.open("rb").read())
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        deserialize = {}
        deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
--- a/spacy/pipeline/senter.pyx
+++ b/spacy/pipeline/senter.pyx
@ -183,7 +183,7 @@ class SentenceRecognizer(Tagger):
            try:
                self.model.from_bytes(b)
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        deserialize = {
            "vocab": lambda b: self.vocab.from_bytes(b),
@ -222,7 +222,7 @@ class SentenceRecognizer(Tagger):
                try:
                    self.model.from_bytes(file_.read())
                except AttributeError:
-                    raise ValueError(Errors.E149)
+                    raise ValueError(Errors.E149) from None
        deserialize = {
            "vocab": lambda p: self.vocab.from_disk(p),
--- a/spacy/pipeline/tagger.pyx
+++ b/spacy/pipeline/tagger.pyx
@ -195,7 +195,7 @@ class Tagger(Pipe):
                return
        except AttributeError:
            types = set([type(eg) for eg in examples])
-            raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types))
+            raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types)) from None
        set_dropout_rate(self.model, drop)
        tag_scores, bp_tag_scores = self.model.begin_update(
            [eg.predicted for eg in examples])
@ -232,7 +232,7 @@ class Tagger(Pipe):
            docs = [eg.predicted for eg in examples]
        except AttributeError:
            types = set([type(eg) for eg in examples])
-            raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
+            raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types)) from None
        if self._rehearsal_model is None:
            return
        if not any(len(doc) for doc in docs):
@ -292,7 +292,7 @@ class Tagger(Pipe):
            try:
                y = example.y
            except AttributeError:
-                raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
+                raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) from None
            for token in y:
                tag = token.tag_
                if tag in orig_tag_map:
@ -400,7 +400,7 @@ class Tagger(Pipe):
            try:
                self.model.from_bytes(b)
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        def load_tag_map(b):
            tag_map = srsly.msgpack_loads(b)
@ -456,7 +456,7 @@ class Tagger(Pipe):
                try:
                    self.model.from_bytes(file_.read())
                except AttributeError:
-                    raise ValueError(Errors.E149)
+                    raise ValueError(Errors.E149) from None
        def load_tag_map(p):
            tag_map = srsly.read_msgpack(p)
--- a/spacy/pipeline/textcat.py
+++ b/spacy/pipeline/textcat.py
@ -203,7 +203,7 @@ class TextCategorizer(Pipe):
            types = set([type(eg) for eg in examples])
            raise TypeError(
                Errors.E978.format(name="TextCategorizer", method="update", types=types)
-            )
+            ) from None
        set_dropout_rate(self.model, drop)
        scores, bp_scores = self.model.begin_update([eg.predicted for eg in examples])
        loss, d_scores = self.get_loss(examples, scores)
@ -250,7 +250,7 @@ class TextCategorizer(Pipe):
            err = Errors.E978.format(
                name="TextCategorizer", method="rehearse", types=types
            )
-            raise TypeError(err)
+            raise TypeError(err) from None
        if not any(len(doc) for doc in docs):
            # Handle cases where there are no tokens in any docs.
            return losses
@ -351,7 +351,7 @@ class TextCategorizer(Pipe):
                err = Errors.E978.format(
                    name="TextCategorizer", method="update", types=type(example)
                )
-                raise TypeError(err)
+                raise TypeError(err) from None
            for cat in y.cats:
                self.add_label(cat)
        self.require_labels()
--- a/spacy/pipeline/transition_parser.pyx
+++ b/spacy/pipeline/transition_parser.pyx
@ -473,7 +473,7 @@ cdef class Parser(Pipe):
                self._resize()
                self.model.from_bytes(bytes_data)
            except AttributeError:
-                raise ValueError(Errors.E149)
+                raise ValueError(Errors.E149) from None
        return self
    def to_bytes(self, exclude=tuple()):
@ -498,7 +498,7 @@ cdef class Parser(Pipe):
                try:
                    self.model.from_bytes(msg['model'])
                except AttributeError:
-                    raise ValueError(Errors.E149)
+                    raise ValueError(Errors.E149) from None
        return self
    def _init_gold_batch(self, examples, min_length=5, max_length=500):
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -699,7 +699,7 @@ cdef class Doc:
                       for id_ in py_attr_ids]
        except KeyError as msg:
            keys = [k for k in IDS.keys() if not k.startswith("FLAG")]
-            raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys))
+            raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None
        # Make an array from the attributes --- otherwise our inner loop is
        # Python dict iteration.
        cdef np.ndarray attr_ids = numpy.asarray(py_attr_ids, dtype="i")
--- a/spacy/util.py
+++ b/spacy/util.py
@ -138,7 +138,7 @@ def get_lang_class(lang: str) -> "Language":
        try:
            module = importlib.import_module(f".lang.{lang}", "spacy")
        except ImportError as err:
-            raise ImportError(Errors.E048.format(lang=lang, err=err))
+            raise ImportError(Errors.E048.format(lang=lang, err=err)) from err
        set_lang_class(lang, getattr(module, module.__all__[0]))
    return registry.languages.get(lang)
@ -502,7 +502,7 @@ def run_command(command: Union[str, List[str]]) -> None:
    except FileNotFoundError:
        raise FileNotFoundError(
            Errors.E970.format(str_command=" ".join(command), tool=command[0])
-        )
+        ) from None
    if status != 0:
        sys.exit(status)
@ -891,7 +891,7 @@ def get_words_and_spaces(
        try:
            word_start = text[text_pos:].index(word)
        except ValueError:
-            raise ValueError(Errors.E194.format(text=text, words=words))
+            raise ValueError(Errors.E194.format(text=text, words=words)) from None
        if word_start > 0:
            text_words.append(text[text_pos : text_pos + word_start])
            text_spaces.append(False)
@ -918,7 +918,7 @@ def copy_config(config: Union[Dict[str, Any], Config]) -> Config:
    try:
        return Config(config).copy()
    except ValueError:
-        raise ValueError(Errors.E961.format(config=config))
+        raise ValueError(Errors.E961.format(config=config)) from None
 def deep_merge_configs(
@ -1002,7 +1002,7 @@ def dot_to_object(config: Config, section: str):
        try:
            component = component[item]
        except (KeyError, TypeError):
-            raise KeyError(Errors.E952.format(name=section))
+            raise KeyError(Errors.E952.format(name=section)) from None
    return component