error and warning messages

This commit is contained in:
svlandeg 2019-03-22 16:55:05 +01:00
parent 9de9900510
commit 46f4eb5db3
2 changed files with 15 additions and 10 deletions

View File

@ -80,6 +80,8 @@ class Warnings(object):
"the v2.x models cannot release the global interpreter lock. " "the v2.x models cannot release the global interpreter lock. "
"Future versions may introduce a `n_process` argument for " "Future versions may introduce a `n_process` argument for "
"parallel inference via multiprocessing.") "parallel inference via multiprocessing.")
W017 = ("Alias '{alias}' already exists in the Knowledge base.")
W018 = ("Entity '{entity}' already exists in the Knowledge base.")
@add_codes @add_codes
@ -376,6 +378,11 @@ class Errors(object):
"Instead, create a new Span object and specify the `kb_id` keyword argument, " "Instead, create a new Span object and specify the `kb_id` keyword argument, "
"for example:\nfrom spacy.tokens import Span\n" "for example:\nfrom spacy.tokens import Span\n"
"span = Span(doc, start={start}, end={end}, label='{label}', kb_id='{kb_id}')") "span = Span(doc, start={start}, end={end}, label='{label}', kb_id='{kb_id}')")
E132 = ("The vectors for entities and probabilities for alias '{alias}' should have equal length, "
"but found {entities_length} and {probabilities_length} respectively.")
E133 = ("The sum of prior probabilities for alias '{alias}' should not exceed 1, "
"but found {sum}.")
E134 = ("Alias '{alias}' defined for unknown entity '{entity}'.")
@add_codes @add_codes

View File

@ -1,6 +1,6 @@
# cython: profile=True # cython: profile=True
# coding: utf8 # coding: utf8
from spacy.errors import user_warning from spacy.errors import Errors, Warnings, user_warning
cdef class Candidate: cdef class Candidate:
@ -77,7 +77,7 @@ cdef class KnowledgeBase:
# Return if this entity was added before # Return if this entity was added before
if id_hash in self._entry_index: if id_hash in self._entry_index:
user_warning("Entity " + entity_id + " already exists in the KB") user_warning(Warnings.W018.format(entity=entity_id))
return return
cdef int32_t dummy_value = 342 cdef int32_t dummy_value = 342
@ -96,22 +96,20 @@ cdef class KnowledgeBase:
# Throw an error if the length of entities and probabilities are not the same # Throw an error if the length of entities and probabilities are not the same
if not len(entities) == len(probabilities): if not len(entities) == len(probabilities):
raise ValueError("The vectors for entities and probabilities for alias '" + alias raise ValueError(Errors.E132.format(alias=alias,
+ "' should have equal length, but found " entities_length=len(entities),
+ str(len(entities)) + " and " + str(len(probabilities)) + "respectively.") probabilities_length=len(probabilities)))
# Throw an error if the probabilities sum up to more than 1 # Throw an error if the probabilities sum up to more than 1
prob_sum = sum(probabilities) prob_sum = sum(probabilities)
if prob_sum > 1: if prob_sum > 1:
raise ValueError("The sum of prior probabilities for alias '" + alias + "' should not exceed 1, " raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
+ "but found " + str(prob_sum))
cdef hash_t alias_hash = self.vocab.strings.add(alias) cdef hash_t alias_hash = self.vocab.strings.add(alias)
# Return if this alias was added before # Return if this alias was added before
if alias_hash in self._alias_index: if alias_hash in self._alias_index:
user_warning("Alias " + alias + " already exists in the KB") user_warning(Warnings.W017.format(alias=alias))
return return
cdef hash_t entity_hash cdef hash_t entity_hash
@ -122,7 +120,7 @@ cdef class KnowledgeBase:
for entity, prob in zip(entities, probabilities): for entity, prob in zip(entities, probabilities):
entity_id_hash = self.vocab.strings[entity] entity_id_hash = self.vocab.strings[entity]
if not entity_id_hash in self._entry_index: if not entity_id_hash in self._entry_index:
raise ValueError("Alias '" + alias + "' defined for unknown entity '" + entity + "'") raise ValueError(Errors.E134.format(alias=alias, entity=entity))
entry_index = <int64_t>self._entry_index.get(entity_id_hash) entry_index = <int64_t>self._entry_index.get(entity_id_hash)
entry_indices.push_back(int(entry_index)) entry_indices.push_back(int(entry_index))