From 56c17973aa2526966eb93e2f26fc69c351dacf05 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 5 Aug 2020 23:53:21 +0200 Subject: [PATCH] Use "raise ... from" in custom errors for better tracebacks --- spacy/cli/train.py | 6 +++--- spacy/gold/example.pyx | 6 +++--- spacy/lang/ja/__init__.py | 4 ++-- spacy/lang/ko/__init__.py | 2 +- spacy/lang/ru/lemmatizer.py | 2 +- spacy/lang/th/__init__.py | 2 +- spacy/lang/uk/lemmatizer.py | 2 +- spacy/lang/vi/__init__.py | 2 +- spacy/lang/zh/__init__.py | 14 +++++++------- spacy/language.py | 2 +- spacy/matcher/matcher.pyx | 2 +- spacy/pipeline/attributeruler.py | 2 +- spacy/pipeline/entity_linker.py | 6 +++--- spacy/pipeline/morphologizer.pyx | 4 ++-- spacy/pipeline/multitask.pyx | 2 +- spacy/pipeline/pipe.pyx | 4 ++-- spacy/pipeline/senter.pyx | 4 ++-- spacy/pipeline/tagger.pyx | 10 +++++----- spacy/pipeline/textcat.py | 6 +++--- spacy/pipeline/transition_parser.pyx | 4 ++-- spacy/tokens/doc.pyx | 2 +- spacy/util.py | 10 +++++----- 22 files changed, 49 insertions(+), 49 deletions(-) diff --git a/spacy/cli/train.py b/spacy/cli/train.py index c5c6e7252..32d22d1bc 100644 --- a/spacy/cli/train.py +++ b/spacy/cli/train.py @@ -211,7 +211,7 @@ def create_evaluation_callback( except KeyError as e: keys = list(scores.keys()) err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys) - raise KeyError(err) + raise KeyError(err) from None return weighted_score, scores return evaluate @@ -369,7 +369,7 @@ def setup_printer( Errors.E983.format( dict="scores (losses)", key=str(e), keys=list(info["losses"].keys()) ) - ) + ) from None try: scores = [ @@ -382,7 +382,7 @@ def setup_printer( key=str(e), keys=list(info["other_scores"].keys()), ) - ) + ) from None data = ( [info["epoch"], info["step"]] + losses diff --git a/spacy/gold/example.pyx b/spacy/gold/example.pyx index f90d98603..6093d2346 100644 --- a/spacy/gold/example.pyx +++ b/spacy/gold/example.pyx @@ -139,7 +139,7 @@ cdef class Example: def get_aligned_spans_y2x(self, y_spans): return self._get_aligned_spans(self.x, y_spans, self.alignment.y2x) - + def _get_aligned_spans(self, doc, spans, align): seen = set() output = [] @@ -207,7 +207,7 @@ cdef class Example: sent_starts and return a list of the new Examples""" if not self.reference.is_sentenced: return [self] - + align = self.alignment.y2x seen_indices = set() output = [] @@ -267,7 +267,7 @@ def _annot2array(vocab, tok_annot, doc_annot): values.append([vocab.strings.add(v) for v in value]) except TypeError: types= set([type(v) for v in value]) - raise TypeError(Errors.E969.format(field=key, types=types)) + raise TypeError(Errors.E969.format(field=key, types=types)) from None array = numpy.asarray(values, dtype="uint64") return attrs, array.T diff --git a/spacy/lang/ja/__init__.py b/spacy/lang/ja/__init__.py index d435afe12..900db4e4c 100644 --- a/spacy/lang/ja/__init__.py +++ b/spacy/lang/ja/__init__.py @@ -200,7 +200,7 @@ def try_sudachi_import(split_mode="A"): "(https://github.com/WorksApplications/SudachiPy). " "Install with `pip install sudachipy sudachidict_core` or " "install spaCy with `pip install spacy[ja]`." - ) + ) from None def resolve_pos(orth, tag, next_tag): @@ -263,7 +263,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"): try: word_start = text[text_pos:].index(word) except ValueError: - raise ValueError(Errors.E194.format(text=text, words=words)) + raise ValueError(Errors.E194.format(text=text, words=words)) from None # space token if word_start > 0: diff --git a/spacy/lang/ko/__init__.py b/spacy/lang/ko/__init__.py index 6197ab927..f2954f461 100644 --- a/spacy/lang/ko/__init__.py +++ b/spacy/lang/ko/__init__.py @@ -85,7 +85,7 @@ def try_mecab_import() -> None: "Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), " "[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), " "and [natto-py](https://github.com/buruzaemon/natto-py)" - ) + ) from None def check_spaces(text, tokens): diff --git a/spacy/lang/ru/lemmatizer.py b/spacy/lang/ru/lemmatizer.py index a9a7ad80f..28767348d 100644 --- a/spacy/lang/ru/lemmatizer.py +++ b/spacy/lang/ru/lemmatizer.py @@ -21,7 +21,7 @@ class RussianLemmatizer(Lemmatizer): 'try to fix it with "pip install pymorphy2==0.8" ' 'or "pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"' "if you need Ukrainian too" - ) + ) from None if RussianLemmatizer._morph is None: RussianLemmatizer._morph = MorphAnalyzer() diff --git a/spacy/lang/th/__init__.py b/spacy/lang/th/__init__.py index 989c22a42..a35ae987f 100644 --- a/spacy/lang/th/__init__.py +++ b/spacy/lang/th/__init__.py @@ -31,7 +31,7 @@ class ThaiTokenizer(DummyTokenizer): raise ImportError( "The Thai tokenizer requires the PyThaiNLP library: " "https://github.com/PyThaiNLP/pythainlp" - ) + ) from None self.word_tokenize = word_tokenize self.vocab = nlp.vocab diff --git a/spacy/lang/uk/lemmatizer.py b/spacy/lang/uk/lemmatizer.py index de2d0c170..cf89d1a12 100644 --- a/spacy/lang/uk/lemmatizer.py +++ b/spacy/lang/uk/lemmatizer.py @@ -23,7 +23,7 @@ class UkrainianLemmatizer(Lemmatizer): "The Ukrainian lemmatizer requires the pymorphy2 library and " 'dictionaries: try to fix it with "pip uninstall pymorphy2" and' '"pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"' - ) + ) from None def __call__( self, string: str, univ_pos: str, morphology: Optional[dict] = None diff --git a/spacy/lang/vi/__init__.py b/spacy/lang/vi/__init__.py index 2b06d33f7..1db762adb 100644 --- a/spacy/lang/vi/__init__.py +++ b/spacy/lang/vi/__init__.py @@ -38,7 +38,7 @@ class VietnameseTokenizer(DummyTokenizer): "Pyvi not installed. Either set use_pyvi = False, " "or install it https://pypi.python.org/pypi/pyvi" ) - raise ImportError(msg) + raise ImportError(msg) from None def __call__(self, text: str) -> Doc: if self.use_pyvi: diff --git a/spacy/lang/zh/__init__.py b/spacy/lang/zh/__init__.py index fe0613c80..5d3bd2a96 100644 --- a/spacy/lang/zh/__init__.py +++ b/spacy/lang/zh/__init__.py @@ -129,7 +129,7 @@ class ChineseTokenizer(DummyTokenizer): "pkuseg not installed: unable to reset pkuseg " "user dict. Please " + _PKUSEG_INSTALL_MSG ) - raise ImportError(msg) + raise ImportError(msg) from None for word in words: self.pkuseg_seg.preprocesser.insert(word.strip(), "") else: @@ -208,7 +208,7 @@ class ChineseTokenizer(DummyTokenizer): raise ImportError( "pkuseg not installed. To use this model, " + _PKUSEG_INSTALL_MSG - ) + ) from None self.pkuseg_seg = pkuseg.pkuseg(str(tempdir)) if pkuseg_data["processors_data"]: processors_data = pkuseg_data["processors_data"] @@ -258,7 +258,7 @@ class ChineseTokenizer(DummyTokenizer): raise ImportError( "pkuseg not installed. To use this model, " + _PKUSEG_INSTALL_MSG - ) + ) from None if path.exists(): self.pkuseg_seg = pkuseg.pkuseg(path) @@ -267,7 +267,7 @@ class ChineseTokenizer(DummyTokenizer): import pkuseg except ImportError: if self.segmenter == Segmenter.pkuseg: - raise ImportError(self._pkuseg_install_msg) + raise ImportError(self._pkuseg_install_msg) from None if self.segmenter == Segmenter.pkuseg: data = srsly.read_msgpack(path) (user_dict, do_process, common_words, other_words) = data @@ -311,7 +311,7 @@ def try_jieba_import(segmenter: str) -> None: "Jieba not installed. To use jieba, install it with `pip " " install jieba` or from https://github.com/fxsjy/jieba" ) - raise ImportError(msg) + raise ImportError(msg) from None def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str) -> None: @@ -332,11 +332,11 @@ def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str) except ImportError: if segmenter == Segmenter.pkuseg: msg = "pkuseg not installed. To use pkuseg, " + _PKUSEG_INSTALL_MSG - raise ImportError(msg) + raise ImportError(msg) from None except FileNotFoundError: if segmenter == Segmenter.pkuseg: msg = "Unable to load pkuseg model from: " + pkuseg_model - raise FileNotFoundError(msg) + raise FileNotFoundError(msg) from None def _get_pkuseg_trie_data(node, path=""): diff --git a/spacy/language.py b/spacy/language.py index e9d7e9eb6..9018af73c 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -869,7 +869,7 @@ class Language: try: doc = proc(doc, **component_cfg.get(name, {})) except KeyError: - raise ValueError(Errors.E109.format(name=name)) + raise ValueError(Errors.E109.format(name=name)) from None if doc is None: raise ValueError(Errors.E005.format(name=name)) return doc diff --git a/spacy/matcher/matcher.pyx b/spacy/matcher/matcher.pyx index 325c81369..a0f3f1655 100644 --- a/spacy/matcher/matcher.pyx +++ b/spacy/matcher/matcher.pyx @@ -131,7 +131,7 @@ cdef class Matcher: for attr, _ in spec[1]: self._seen_attrs.add(attr) except OverflowError, AttributeError: - raise ValueError(Errors.E154.format()) + raise ValueError(Errors.E154.format()) from None self._patterns.setdefault(key, []) self._callbacks[key] = on_match self._filter[key] = greedy diff --git a/spacy/pipeline/attributeruler.py b/spacy/pipeline/attributeruler.py index 1f1e63959..d5abf7863 100644 --- a/spacy/pipeline/attributeruler.py +++ b/spacy/pipeline/attributeruler.py @@ -85,7 +85,7 @@ class AttributeRuler(Pipe): span=[t.text for t in span], index=index, ) - ) + ) from None set_token_attrs(token, attrs) return doc diff --git a/spacy/pipeline/entity_linker.py b/spacy/pipeline/entity_linker.py index 923d925dc..d922db1ad 100644 --- a/spacy/pipeline/entity_linker.py +++ b/spacy/pipeline/entity_linker.py @@ -195,7 +195,7 @@ class EntityLinker(Pipe): types = set([type(eg) for eg in examples]) raise TypeError( Errors.E978.format(name="EntityLinker", method="update", types=types) - ) + ) from None if set_annotations: # This seems simpler than other ways to get that exact output -- but # it does run the model twice :( @@ -213,7 +213,7 @@ class EntityLinker(Pipe): sent_index = sentences.index(ent.sent) except AttributeError: # Catch the exception when ent.sent is None and provide a user-friendly warning - raise RuntimeError(Errors.E030) + raise RuntimeError(Errors.E030) from None # get n previous sentences, if there are any start_sentence = max(0, sent_index - self.n_sents) # get n posterior sentences, or as many < n as there are @@ -439,7 +439,7 @@ class EntityLinker(Pipe): try: self.model.from_bytes(p.open("rb").read()) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None def load_kb(p): self.kb = KnowledgeBase(entity_vector_length=self.cfg["entity_width"]) diff --git a/spacy/pipeline/morphologizer.pyx b/spacy/pipeline/morphologizer.pyx index 18673f85b..06c9f9a25 100644 --- a/spacy/pipeline/morphologizer.pyx +++ b/spacy/pipeline/morphologizer.pyx @@ -262,7 +262,7 @@ class Morphologizer(Tagger): try: self.model.from_bytes(b) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = { "vocab": lambda b: self.vocab.from_bytes(b), @@ -301,7 +301,7 @@ class Morphologizer(Tagger): try: self.model.from_bytes(file_.read()) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = { "vocab": lambda p: self.vocab.from_disk(p), diff --git a/spacy/pipeline/multitask.pyx b/spacy/pipeline/multitask.pyx index d85030adb..4b582045d 100644 --- a/spacy/pipeline/multitask.pyx +++ b/spacy/pipeline/multitask.pyx @@ -211,7 +211,7 @@ class ClozeMultitask(Pipe): predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples]) except AttributeError: types = set([type(eg) for eg in examples]) - raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types)) + raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types)) from None loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions) bp_predictions(d_predictions) if sgd is not None: diff --git a/spacy/pipeline/pipe.pyx b/spacy/pipeline/pipe.pyx index 1a94905a2..bed4cdd16 100644 --- a/spacy/pipeline/pipe.pyx +++ b/spacy/pipeline/pipe.pyx @@ -204,7 +204,7 @@ cdef class Pipe: try: self.model.from_bytes(b) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = {} if hasattr(self, "vocab"): @@ -242,7 +242,7 @@ cdef class Pipe: try: self.model.from_bytes(p.open("rb").read()) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = {} deserialize["vocab"] = lambda p: self.vocab.from_disk(p) diff --git a/spacy/pipeline/senter.pyx b/spacy/pipeline/senter.pyx index 620a8557e..3147cc902 100644 --- a/spacy/pipeline/senter.pyx +++ b/spacy/pipeline/senter.pyx @@ -183,7 +183,7 @@ class SentenceRecognizer(Tagger): try: self.model.from_bytes(b) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = { "vocab": lambda b: self.vocab.from_bytes(b), @@ -222,7 +222,7 @@ class SentenceRecognizer(Tagger): try: self.model.from_bytes(file_.read()) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None deserialize = { "vocab": lambda p: self.vocab.from_disk(p), diff --git a/spacy/pipeline/tagger.pyx b/spacy/pipeline/tagger.pyx index 43f5b02cb..da1b3d3aa 100644 --- a/spacy/pipeline/tagger.pyx +++ b/spacy/pipeline/tagger.pyx @@ -195,7 +195,7 @@ class Tagger(Pipe): return except AttributeError: types = set([type(eg) for eg in examples]) - raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types)) + raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types)) from None set_dropout_rate(self.model, drop) tag_scores, bp_tag_scores = self.model.begin_update( [eg.predicted for eg in examples]) @@ -232,7 +232,7 @@ class Tagger(Pipe): docs = [eg.predicted for eg in examples] except AttributeError: types = set([type(eg) for eg in examples]) - raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types)) + raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types)) from None if self._rehearsal_model is None: return if not any(len(doc) for doc in docs): @@ -292,7 +292,7 @@ class Tagger(Pipe): try: y = example.y except AttributeError: - raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) + raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) from None for token in y: tag = token.tag_ if tag in orig_tag_map: @@ -400,7 +400,7 @@ class Tagger(Pipe): try: self.model.from_bytes(b) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None def load_tag_map(b): tag_map = srsly.msgpack_loads(b) @@ -456,7 +456,7 @@ class Tagger(Pipe): try: self.model.from_bytes(file_.read()) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None def load_tag_map(p): tag_map = srsly.read_msgpack(p) diff --git a/spacy/pipeline/textcat.py b/spacy/pipeline/textcat.py index bc16e790f..8b46082cb 100644 --- a/spacy/pipeline/textcat.py +++ b/spacy/pipeline/textcat.py @@ -203,7 +203,7 @@ class TextCategorizer(Pipe): types = set([type(eg) for eg in examples]) raise TypeError( Errors.E978.format(name="TextCategorizer", method="update", types=types) - ) + ) from None set_dropout_rate(self.model, drop) scores, bp_scores = self.model.begin_update([eg.predicted for eg in examples]) loss, d_scores = self.get_loss(examples, scores) @@ -250,7 +250,7 @@ class TextCategorizer(Pipe): err = Errors.E978.format( name="TextCategorizer", method="rehearse", types=types ) - raise TypeError(err) + raise TypeError(err) from None if not any(len(doc) for doc in docs): # Handle cases where there are no tokens in any docs. return losses @@ -351,7 +351,7 @@ class TextCategorizer(Pipe): err = Errors.E978.format( name="TextCategorizer", method="update", types=type(example) ) - raise TypeError(err) + raise TypeError(err) from None for cat in y.cats: self.add_label(cat) self.require_labels() diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index b14a55cb4..9829e764d 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -473,7 +473,7 @@ cdef class Parser(Pipe): self._resize() self.model.from_bytes(bytes_data) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None return self def to_bytes(self, exclude=tuple()): @@ -498,7 +498,7 @@ cdef class Parser(Pipe): try: self.model.from_bytes(msg['model']) except AttributeError: - raise ValueError(Errors.E149) + raise ValueError(Errors.E149) from None return self def _init_gold_batch(self, examples, min_length=5, max_length=500): diff --git a/spacy/tokens/doc.pyx b/spacy/tokens/doc.pyx index 2fcc0983b..935af88d1 100644 --- a/spacy/tokens/doc.pyx +++ b/spacy/tokens/doc.pyx @@ -699,7 +699,7 @@ cdef class Doc: for id_ in py_attr_ids] except KeyError as msg: keys = [k for k in IDS.keys() if not k.startswith("FLAG")] - raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) + raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None # Make an array from the attributes --- otherwise our inner loop is # Python dict iteration. cdef np.ndarray attr_ids = numpy.asarray(py_attr_ids, dtype="i") diff --git a/spacy/util.py b/spacy/util.py index 52073097e..05f8ef017 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -138,7 +138,7 @@ def get_lang_class(lang: str) -> "Language": try: module = importlib.import_module(f".lang.{lang}", "spacy") except ImportError as err: - raise ImportError(Errors.E048.format(lang=lang, err=err)) + raise ImportError(Errors.E048.format(lang=lang, err=err)) from err set_lang_class(lang, getattr(module, module.__all__[0])) return registry.languages.get(lang) @@ -502,7 +502,7 @@ def run_command(command: Union[str, List[str]]) -> None: except FileNotFoundError: raise FileNotFoundError( Errors.E970.format(str_command=" ".join(command), tool=command[0]) - ) + ) from None if status != 0: sys.exit(status) @@ -891,7 +891,7 @@ def get_words_and_spaces( try: word_start = text[text_pos:].index(word) except ValueError: - raise ValueError(Errors.E194.format(text=text, words=words)) + raise ValueError(Errors.E194.format(text=text, words=words)) from None if word_start > 0: text_words.append(text[text_pos : text_pos + word_start]) text_spaces.append(False) @@ -918,7 +918,7 @@ def copy_config(config: Union[Dict[str, Any], Config]) -> Config: try: return Config(config).copy() except ValueError: - raise ValueError(Errors.E961.format(config=config)) + raise ValueError(Errors.E961.format(config=config)) from None def deep_merge_configs( @@ -1002,7 +1002,7 @@ def dot_to_object(config: Config, section: str): try: component = component[item] except (KeyError, TypeError): - raise KeyError(Errors.E952.format(name=section)) + raise KeyError(Errors.E952.format(name=section)) from None return component