mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Use "raise ... from" in custom errors for better tracebacks
This commit is contained in:
parent
5cc0d89fad
commit
56c17973aa
|
@ -211,7 +211,7 @@ def create_evaluation_callback(
|
|||
except KeyError as e:
|
||||
keys = list(scores.keys())
|
||||
err = Errors.E983.format(dict="score_weights", key=str(e), keys=keys)
|
||||
raise KeyError(err)
|
||||
raise KeyError(err) from None
|
||||
return weighted_score, scores
|
||||
|
||||
return evaluate
|
||||
|
@ -369,7 +369,7 @@ def setup_printer(
|
|||
Errors.E983.format(
|
||||
dict="scores (losses)", key=str(e), keys=list(info["losses"].keys())
|
||||
)
|
||||
)
|
||||
) from None
|
||||
|
||||
try:
|
||||
scores = [
|
||||
|
@ -382,7 +382,7 @@ def setup_printer(
|
|||
key=str(e),
|
||||
keys=list(info["other_scores"].keys()),
|
||||
)
|
||||
)
|
||||
) from None
|
||||
data = (
|
||||
[info["epoch"], info["step"]]
|
||||
+ losses
|
||||
|
|
|
@ -139,7 +139,7 @@ cdef class Example:
|
|||
|
||||
def get_aligned_spans_y2x(self, y_spans):
|
||||
return self._get_aligned_spans(self.x, y_spans, self.alignment.y2x)
|
||||
|
||||
|
||||
def _get_aligned_spans(self, doc, spans, align):
|
||||
seen = set()
|
||||
output = []
|
||||
|
@ -207,7 +207,7 @@ cdef class Example:
|
|||
sent_starts and return a list of the new Examples"""
|
||||
if not self.reference.is_sentenced:
|
||||
return [self]
|
||||
|
||||
|
||||
align = self.alignment.y2x
|
||||
seen_indices = set()
|
||||
output = []
|
||||
|
@ -267,7 +267,7 @@ def _annot2array(vocab, tok_annot, doc_annot):
|
|||
values.append([vocab.strings.add(v) for v in value])
|
||||
except TypeError:
|
||||
types= set([type(v) for v in value])
|
||||
raise TypeError(Errors.E969.format(field=key, types=types))
|
||||
raise TypeError(Errors.E969.format(field=key, types=types)) from None
|
||||
|
||||
array = numpy.asarray(values, dtype="uint64")
|
||||
return attrs, array.T
|
||||
|
|
|
@ -200,7 +200,7 @@ def try_sudachi_import(split_mode="A"):
|
|||
"(https://github.com/WorksApplications/SudachiPy). "
|
||||
"Install with `pip install sudachipy sudachidict_core` or "
|
||||
"install spaCy with `pip install spacy[ja]`."
|
||||
)
|
||||
) from None
|
||||
|
||||
|
||||
def resolve_pos(orth, tag, next_tag):
|
||||
|
@ -263,7 +263,7 @@ def get_dtokens_and_spaces(dtokens, text, gap_tag="空白"):
|
|||
try:
|
||||
word_start = text[text_pos:].index(word)
|
||||
except ValueError:
|
||||
raise ValueError(Errors.E194.format(text=text, words=words))
|
||||
raise ValueError(Errors.E194.format(text=text, words=words)) from None
|
||||
|
||||
# space token
|
||||
if word_start > 0:
|
||||
|
|
|
@ -85,7 +85,7 @@ def try_mecab_import() -> None:
|
|||
"Korean support requires [mecab-ko](https://bitbucket.org/eunjeon/mecab-ko/src/master/README.md), "
|
||||
"[mecab-ko-dic](https://bitbucket.org/eunjeon/mecab-ko-dic), "
|
||||
"and [natto-py](https://github.com/buruzaemon/natto-py)"
|
||||
)
|
||||
) from None
|
||||
|
||||
|
||||
def check_spaces(text, tokens):
|
||||
|
|
|
@ -21,7 +21,7 @@ class RussianLemmatizer(Lemmatizer):
|
|||
'try to fix it with "pip install pymorphy2==0.8" '
|
||||
'or "pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
|
||||
"if you need Ukrainian too"
|
||||
)
|
||||
) from None
|
||||
if RussianLemmatizer._morph is None:
|
||||
RussianLemmatizer._morph = MorphAnalyzer()
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ class ThaiTokenizer(DummyTokenizer):
|
|||
raise ImportError(
|
||||
"The Thai tokenizer requires the PyThaiNLP library: "
|
||||
"https://github.com/PyThaiNLP/pythainlp"
|
||||
)
|
||||
) from None
|
||||
self.word_tokenize = word_tokenize
|
||||
self.vocab = nlp.vocab
|
||||
|
||||
|
|
|
@ -23,7 +23,7 @@ class UkrainianLemmatizer(Lemmatizer):
|
|||
"The Ukrainian lemmatizer requires the pymorphy2 library and "
|
||||
'dictionaries: try to fix it with "pip uninstall pymorphy2" and'
|
||||
'"pip install git+https://github.com/kmike/pymorphy2.git pymorphy2-dicts-uk"'
|
||||
)
|
||||
) from None
|
||||
|
||||
def __call__(
|
||||
self, string: str, univ_pos: str, morphology: Optional[dict] = None
|
||||
|
|
|
@ -38,7 +38,7 @@ class VietnameseTokenizer(DummyTokenizer):
|
|||
"Pyvi not installed. Either set use_pyvi = False, "
|
||||
"or install it https://pypi.python.org/pypi/pyvi"
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(msg) from None
|
||||
|
||||
def __call__(self, text: str) -> Doc:
|
||||
if self.use_pyvi:
|
||||
|
|
|
@ -129,7 +129,7 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
"pkuseg not installed: unable to reset pkuseg "
|
||||
"user dict. Please " + _PKUSEG_INSTALL_MSG
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(msg) from None
|
||||
for word in words:
|
||||
self.pkuseg_seg.preprocesser.insert(word.strip(), "")
|
||||
else:
|
||||
|
@ -208,7 +208,7 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
raise ImportError(
|
||||
"pkuseg not installed. To use this model, "
|
||||
+ _PKUSEG_INSTALL_MSG
|
||||
)
|
||||
) from None
|
||||
self.pkuseg_seg = pkuseg.pkuseg(str(tempdir))
|
||||
if pkuseg_data["processors_data"]:
|
||||
processors_data = pkuseg_data["processors_data"]
|
||||
|
@ -258,7 +258,7 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
raise ImportError(
|
||||
"pkuseg not installed. To use this model, "
|
||||
+ _PKUSEG_INSTALL_MSG
|
||||
)
|
||||
) from None
|
||||
if path.exists():
|
||||
self.pkuseg_seg = pkuseg.pkuseg(path)
|
||||
|
||||
|
@ -267,7 +267,7 @@ class ChineseTokenizer(DummyTokenizer):
|
|||
import pkuseg
|
||||
except ImportError:
|
||||
if self.segmenter == Segmenter.pkuseg:
|
||||
raise ImportError(self._pkuseg_install_msg)
|
||||
raise ImportError(self._pkuseg_install_msg) from None
|
||||
if self.segmenter == Segmenter.pkuseg:
|
||||
data = srsly.read_msgpack(path)
|
||||
(user_dict, do_process, common_words, other_words) = data
|
||||
|
@ -311,7 +311,7 @@ def try_jieba_import(segmenter: str) -> None:
|
|||
"Jieba not installed. To use jieba, install it with `pip "
|
||||
" install jieba` or from https://github.com/fxsjy/jieba"
|
||||
)
|
||||
raise ImportError(msg)
|
||||
raise ImportError(msg) from None
|
||||
|
||||
|
||||
def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str) -> None:
|
||||
|
@ -332,11 +332,11 @@ def try_pkuseg_import(segmenter: str, pkuseg_model: str, pkuseg_user_dict: str)
|
|||
except ImportError:
|
||||
if segmenter == Segmenter.pkuseg:
|
||||
msg = "pkuseg not installed. To use pkuseg, " + _PKUSEG_INSTALL_MSG
|
||||
raise ImportError(msg)
|
||||
raise ImportError(msg) from None
|
||||
except FileNotFoundError:
|
||||
if segmenter == Segmenter.pkuseg:
|
||||
msg = "Unable to load pkuseg model from: " + pkuseg_model
|
||||
raise FileNotFoundError(msg)
|
||||
raise FileNotFoundError(msg) from None
|
||||
|
||||
|
||||
def _get_pkuseg_trie_data(node, path=""):
|
||||
|
|
|
@ -869,7 +869,7 @@ class Language:
|
|||
try:
|
||||
doc = proc(doc, **component_cfg.get(name, {}))
|
||||
except KeyError:
|
||||
raise ValueError(Errors.E109.format(name=name))
|
||||
raise ValueError(Errors.E109.format(name=name)) from None
|
||||
if doc is None:
|
||||
raise ValueError(Errors.E005.format(name=name))
|
||||
return doc
|
||||
|
|
|
@ -131,7 +131,7 @@ cdef class Matcher:
|
|||
for attr, _ in spec[1]:
|
||||
self._seen_attrs.add(attr)
|
||||
except OverflowError, AttributeError:
|
||||
raise ValueError(Errors.E154.format())
|
||||
raise ValueError(Errors.E154.format()) from None
|
||||
self._patterns.setdefault(key, [])
|
||||
self._callbacks[key] = on_match
|
||||
self._filter[key] = greedy
|
||||
|
|
|
@ -85,7 +85,7 @@ class AttributeRuler(Pipe):
|
|||
span=[t.text for t in span],
|
||||
index=index,
|
||||
)
|
||||
)
|
||||
) from None
|
||||
set_token_attrs(token, attrs)
|
||||
return doc
|
||||
|
||||
|
|
|
@ -195,7 +195,7 @@ class EntityLinker(Pipe):
|
|||
types = set([type(eg) for eg in examples])
|
||||
raise TypeError(
|
||||
Errors.E978.format(name="EntityLinker", method="update", types=types)
|
||||
)
|
||||
) from None
|
||||
if set_annotations:
|
||||
# This seems simpler than other ways to get that exact output -- but
|
||||
# it does run the model twice :(
|
||||
|
@ -213,7 +213,7 @@ class EntityLinker(Pipe):
|
|||
sent_index = sentences.index(ent.sent)
|
||||
except AttributeError:
|
||||
# Catch the exception when ent.sent is None and provide a user-friendly warning
|
||||
raise RuntimeError(Errors.E030)
|
||||
raise RuntimeError(Errors.E030) from None
|
||||
# get n previous sentences, if there are any
|
||||
start_sentence = max(0, sent_index - self.n_sents)
|
||||
# get n posterior sentences, or as many < n as there are
|
||||
|
@ -439,7 +439,7 @@ class EntityLinker(Pipe):
|
|||
try:
|
||||
self.model.from_bytes(p.open("rb").read())
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
def load_kb(p):
|
||||
self.kb = KnowledgeBase(entity_vector_length=self.cfg["entity_width"])
|
||||
|
|
|
@ -262,7 +262,7 @@ class Morphologizer(Tagger):
|
|||
try:
|
||||
self.model.from_bytes(b)
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda b: self.vocab.from_bytes(b),
|
||||
|
@ -301,7 +301,7 @@ class Morphologizer(Tagger):
|
|||
try:
|
||||
self.model.from_bytes(file_.read())
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda p: self.vocab.from_disk(p),
|
||||
|
|
|
@ -211,7 +211,7 @@ class ClozeMultitask(Pipe):
|
|||
predictions, bp_predictions = self.model.begin_update([eg.predicted for eg in examples])
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types))
|
||||
raise TypeError(Errors.E978.format(name="ClozeMultitask", method="rehearse", types=types)) from None
|
||||
loss, d_predictions = self.get_loss(examples, self.vocab.vectors.data, predictions)
|
||||
bp_predictions(d_predictions)
|
||||
if sgd is not None:
|
||||
|
|
|
@ -204,7 +204,7 @@ cdef class Pipe:
|
|||
try:
|
||||
self.model.from_bytes(b)
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {}
|
||||
if hasattr(self, "vocab"):
|
||||
|
@ -242,7 +242,7 @@ cdef class Pipe:
|
|||
try:
|
||||
self.model.from_bytes(p.open("rb").read())
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {}
|
||||
deserialize["vocab"] = lambda p: self.vocab.from_disk(p)
|
||||
|
|
|
@ -183,7 +183,7 @@ class SentenceRecognizer(Tagger):
|
|||
try:
|
||||
self.model.from_bytes(b)
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda b: self.vocab.from_bytes(b),
|
||||
|
@ -222,7 +222,7 @@ class SentenceRecognizer(Tagger):
|
|||
try:
|
||||
self.model.from_bytes(file_.read())
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
deserialize = {
|
||||
"vocab": lambda p: self.vocab.from_disk(p),
|
||||
|
|
|
@ -195,7 +195,7 @@ class Tagger(Pipe):
|
|||
return
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types))
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="update", types=types)) from None
|
||||
set_dropout_rate(self.model, drop)
|
||||
tag_scores, bp_tag_scores = self.model.begin_update(
|
||||
[eg.predicted for eg in examples])
|
||||
|
@ -232,7 +232,7 @@ class Tagger(Pipe):
|
|||
docs = [eg.predicted for eg in examples]
|
||||
except AttributeError:
|
||||
types = set([type(eg) for eg in examples])
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types))
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="rehearse", types=types)) from None
|
||||
if self._rehearsal_model is None:
|
||||
return
|
||||
if not any(len(doc) for doc in docs):
|
||||
|
@ -292,7 +292,7 @@ class Tagger(Pipe):
|
|||
try:
|
||||
y = example.y
|
||||
except AttributeError:
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example)))
|
||||
raise TypeError(Errors.E978.format(name="Tagger", method="begin_training", types=type(example))) from None
|
||||
for token in y:
|
||||
tag = token.tag_
|
||||
if tag in orig_tag_map:
|
||||
|
@ -400,7 +400,7 @@ class Tagger(Pipe):
|
|||
try:
|
||||
self.model.from_bytes(b)
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
def load_tag_map(b):
|
||||
tag_map = srsly.msgpack_loads(b)
|
||||
|
@ -456,7 +456,7 @@ class Tagger(Pipe):
|
|||
try:
|
||||
self.model.from_bytes(file_.read())
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
|
||||
def load_tag_map(p):
|
||||
tag_map = srsly.read_msgpack(p)
|
||||
|
|
|
@ -203,7 +203,7 @@ class TextCategorizer(Pipe):
|
|||
types = set([type(eg) for eg in examples])
|
||||
raise TypeError(
|
||||
Errors.E978.format(name="TextCategorizer", method="update", types=types)
|
||||
)
|
||||
) from None
|
||||
set_dropout_rate(self.model, drop)
|
||||
scores, bp_scores = self.model.begin_update([eg.predicted for eg in examples])
|
||||
loss, d_scores = self.get_loss(examples, scores)
|
||||
|
@ -250,7 +250,7 @@ class TextCategorizer(Pipe):
|
|||
err = Errors.E978.format(
|
||||
name="TextCategorizer", method="rehearse", types=types
|
||||
)
|
||||
raise TypeError(err)
|
||||
raise TypeError(err) from None
|
||||
if not any(len(doc) for doc in docs):
|
||||
# Handle cases where there are no tokens in any docs.
|
||||
return losses
|
||||
|
@ -351,7 +351,7 @@ class TextCategorizer(Pipe):
|
|||
err = Errors.E978.format(
|
||||
name="TextCategorizer", method="update", types=type(example)
|
||||
)
|
||||
raise TypeError(err)
|
||||
raise TypeError(err) from None
|
||||
for cat in y.cats:
|
||||
self.add_label(cat)
|
||||
self.require_labels()
|
||||
|
|
|
@ -473,7 +473,7 @@ cdef class Parser(Pipe):
|
|||
self._resize()
|
||||
self.model.from_bytes(bytes_data)
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
return self
|
||||
|
||||
def to_bytes(self, exclude=tuple()):
|
||||
|
@ -498,7 +498,7 @@ cdef class Parser(Pipe):
|
|||
try:
|
||||
self.model.from_bytes(msg['model'])
|
||||
except AttributeError:
|
||||
raise ValueError(Errors.E149)
|
||||
raise ValueError(Errors.E149) from None
|
||||
return self
|
||||
|
||||
def _init_gold_batch(self, examples, min_length=5, max_length=500):
|
||||
|
|
|
@ -699,7 +699,7 @@ cdef class Doc:
|
|||
for id_ in py_attr_ids]
|
||||
except KeyError as msg:
|
||||
keys = [k for k in IDS.keys() if not k.startswith("FLAG")]
|
||||
raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys))
|
||||
raise KeyError(Errors.E983.format(dict="IDS", key=msg, keys=keys)) from None
|
||||
# Make an array from the attributes --- otherwise our inner loop is
|
||||
# Python dict iteration.
|
||||
cdef np.ndarray attr_ids = numpy.asarray(py_attr_ids, dtype="i")
|
||||
|
|
|
@ -138,7 +138,7 @@ def get_lang_class(lang: str) -> "Language":
|
|||
try:
|
||||
module = importlib.import_module(f".lang.{lang}", "spacy")
|
||||
except ImportError as err:
|
||||
raise ImportError(Errors.E048.format(lang=lang, err=err))
|
||||
raise ImportError(Errors.E048.format(lang=lang, err=err)) from err
|
||||
set_lang_class(lang, getattr(module, module.__all__[0]))
|
||||
return registry.languages.get(lang)
|
||||
|
||||
|
@ -502,7 +502,7 @@ def run_command(command: Union[str, List[str]]) -> None:
|
|||
except FileNotFoundError:
|
||||
raise FileNotFoundError(
|
||||
Errors.E970.format(str_command=" ".join(command), tool=command[0])
|
||||
)
|
||||
) from None
|
||||
if status != 0:
|
||||
sys.exit(status)
|
||||
|
||||
|
@ -891,7 +891,7 @@ def get_words_and_spaces(
|
|||
try:
|
||||
word_start = text[text_pos:].index(word)
|
||||
except ValueError:
|
||||
raise ValueError(Errors.E194.format(text=text, words=words))
|
||||
raise ValueError(Errors.E194.format(text=text, words=words)) from None
|
||||
if word_start > 0:
|
||||
text_words.append(text[text_pos : text_pos + word_start])
|
||||
text_spaces.append(False)
|
||||
|
@ -918,7 +918,7 @@ def copy_config(config: Union[Dict[str, Any], Config]) -> Config:
|
|||
try:
|
||||
return Config(config).copy()
|
||||
except ValueError:
|
||||
raise ValueError(Errors.E961.format(config=config))
|
||||
raise ValueError(Errors.E961.format(config=config)) from None
|
||||
|
||||
|
||||
def deep_merge_configs(
|
||||
|
@ -1002,7 +1002,7 @@ def dot_to_object(config: Config, section: str):
|
|||
try:
|
||||
component = component[item]
|
||||
except (KeyError, TypeError):
|
||||
raise KeyError(Errors.E952.format(name=section))
|
||||
raise KeyError(Errors.E952.format(name=section)) from None
|
||||
return component
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user