mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 09:56:28 +03:00
custom error and warning messages
This commit is contained in:
parent
791327e3c5
commit
cc9ae28a52
|
@ -324,18 +324,19 @@ def read_training(nlp, training_dir, dev, limit):
|
||||||
if 5 < sent_length < 100:
|
if 5 < sent_length < 100:
|
||||||
ents_by_offset[str(ent.start_char) + "_" + str(ent.end_char)] = ent
|
ents_by_offset[str(ent.start_char) + "_" + str(ent.end_char)] = ent
|
||||||
else:
|
else:
|
||||||
skip_articles.add(current_article_id)
|
skip_articles.add(article_id)
|
||||||
current_doc = None
|
current_doc = None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Problem parsing article", article_id, e)
|
print("Problem parsing article", article_id, e)
|
||||||
skip_articles.add(current_article_id)
|
skip_articles.add(article_id)
|
||||||
|
raise e
|
||||||
|
|
||||||
# repeat checking this condition in case an exception was thrown
|
# repeat checking this condition in case an exception was thrown
|
||||||
if current_doc and (current_article_id == article_id):
|
if current_doc and (current_article_id == article_id):
|
||||||
found_ent = ents_by_offset.get(start + "_" + end, None)
|
found_ent = ents_by_offset.get(start + "_" + end, None)
|
||||||
if found_ent:
|
if found_ent:
|
||||||
if found_ent.text != alias:
|
if found_ent.text != alias:
|
||||||
skip_articles.add(current_article_id)
|
skip_articles.add(article_id)
|
||||||
current_doc = None
|
current_doc = None
|
||||||
else:
|
else:
|
||||||
sent = found_ent.sent.as_doc()
|
sent = found_ent.sent.as_doc()
|
||||||
|
|
|
@ -399,6 +399,9 @@ class Errors(object):
|
||||||
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input includes either the "
|
E138 = ("Invalid JSONL format for raw text '{text}'. Make sure the input includes either the "
|
||||||
"`text` or `tokens` key. For more info, see the docs:\n"
|
"`text` or `tokens` key. For more info, see the docs:\n"
|
||||||
"https://spacy.io/api/cli#pretrain-jsonl")
|
"https://spacy.io/api/cli#pretrain-jsonl")
|
||||||
|
E139 = ("Knowledge base for component '{name}' not initialized. Did you forget to call set_kb()?")
|
||||||
|
E140 = ("The list of entities, prior probabilities and entity vectors should be of equal length.")
|
||||||
|
E141 = ("Entity vectors should be of length {required} instead of the provided {found}.")
|
||||||
|
|
||||||
|
|
||||||
@add_codes
|
@add_codes
|
||||||
|
|
16
spacy/kb.pyx
16
spacy/kb.pyx
|
@ -106,9 +106,9 @@ cdef class KnowledgeBase:
|
||||||
user_warning(Warnings.W018.format(entity=entity))
|
user_warning(Warnings.W018.format(entity=entity))
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Raise an error if the provided entity vector is not of the correct length
|
||||||
if len(entity_vector) != self.entity_vector_length:
|
if len(entity_vector) != self.entity_vector_length:
|
||||||
# TODO: proper error
|
raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
|
||||||
raise ValueError("Entity vector length should have been", self.entity_vector_length)
|
|
||||||
|
|
||||||
vector_index = self.c_add_vector(entity_vector=entity_vector)
|
vector_index = self.c_add_vector(entity_vector=entity_vector)
|
||||||
|
|
||||||
|
@ -121,13 +121,8 @@ cdef class KnowledgeBase:
|
||||||
return entity_hash
|
return entity_hash
|
||||||
|
|
||||||
cpdef set_entities(self, entity_list, prob_list, vector_list):
|
cpdef set_entities(self, entity_list, prob_list, vector_list):
|
||||||
if len(entity_list) != len(prob_list):
|
if len(entity_list) != len(prob_list) or len(entity_list) != len(vector_list):
|
||||||
# TODO: proper error
|
raise ValueError(Errors.E140)
|
||||||
raise ValueError("Entity list and prob list should have the same length")
|
|
||||||
|
|
||||||
if len(entity_list) != len(vector_list):
|
|
||||||
# TODO: proper error
|
|
||||||
raise ValueError("Entity list and vector list should have the same length")
|
|
||||||
|
|
||||||
nr_entities = len(entity_list)
|
nr_entities = len(entity_list)
|
||||||
self._entry_index = PreshMap(nr_entities+1)
|
self._entry_index = PreshMap(nr_entities+1)
|
||||||
|
@ -138,8 +133,7 @@ cdef class KnowledgeBase:
|
||||||
while i < nr_entities:
|
while i < nr_entities:
|
||||||
entity_vector = vector_list[i]
|
entity_vector = vector_list[i]
|
||||||
if len(entity_vector) != self.entity_vector_length:
|
if len(entity_vector) != self.entity_vector_length:
|
||||||
# TODO: proper error
|
raise ValueError(Errors.E141.format(found=len(entity_vector), required=self.entity_vector_length))
|
||||||
raise ValueError("Entity vector is", len(entity_vector), "length but should have been", self.entity_vector_length)
|
|
||||||
|
|
||||||
entity_hash = self.vocab.strings.add(entity_list[i])
|
entity_hash = self.vocab.strings.add(entity_list[i])
|
||||||
entry.entity_hash = entity_hash
|
entry.entity_hash = entity_hash
|
||||||
|
|
|
@ -1002,7 +1002,7 @@ cdef class DependencyParser(Parser):
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def postprocesses(self):
|
def postprocesses(self):
|
||||||
return [nonproj.deprojectivize, merge_subtokens]
|
return [nonproj.deprojectivize] # , merge_subtokens]
|
||||||
|
|
||||||
def add_multitask_objective(self, target):
|
def add_multitask_objective(self, target):
|
||||||
if target == "cloze":
|
if target == "cloze":
|
||||||
|
@ -1100,8 +1100,7 @@ class EntityLinker(Pipe):
|
||||||
def require_kb(self):
|
def require_kb(self):
|
||||||
# Raise an error if the knowledge base is not initialized.
|
# Raise an error if the knowledge base is not initialized.
|
||||||
if getattr(self, "kb", None) in (None, True, False):
|
if getattr(self, "kb", None) in (None, True, False):
|
||||||
# TODO: custom error
|
raise ValueError(Errors.E139.format(name=self.name))
|
||||||
raise ValueError(Errors.E109.format(name=self.name))
|
|
||||||
|
|
||||||
def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, **kwargs):
|
def begin_training(self, get_gold_tuples=lambda: [], pipeline=None, sgd=None, **kwargs):
|
||||||
self.require_kb()
|
self.require_kb()
|
||||||
|
|
Loading…
Reference in New Issue
Block a user