mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 07:13:40 +03:00
allow small rounding errors
This commit is contained in:
parent
3629a52ede
commit
1ae41daaa9
|
@ -61,13 +61,13 @@ def create_kb(vocab, max_entities_per_alias, min_occ, to_print=False):
|
||||||
entity_frequencies = _get_entity_frequencies(entities=title_list)
|
entity_frequencies = _get_entity_frequencies(entities=title_list)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("3. _add_entities", datetime.datetime.now())
|
print("3. adding", len(entity_list), "entities", datetime.datetime.now())
|
||||||
print()
|
print()
|
||||||
kb.set_entities(entity_list=entity_list, prob_list=entity_frequencies, vector_list=None, feature_list=None)
|
kb.set_entities(entity_list=entity_list, prob_list=entity_frequencies, vector_list=None, feature_list=None)
|
||||||
# _add_entities(kb, entities=entity_list, probs=entity_frequencies, to_print=to_print)
|
# _add_entities(kb, entities=entity_list, probs=entity_frequencies, to_print=to_print)
|
||||||
|
|
||||||
print()
|
print()
|
||||||
print("4. _add_aliases", datetime.datetime.now())
|
print("4. adding aliases", datetime.datetime.now())
|
||||||
print()
|
print()
|
||||||
_add_aliases(kb, title_to_id=title_to_id, max_entities_per_alias=max_entities_per_alias, min_occ=min_occ,)
|
_add_aliases(kb, title_to_id=title_to_id, max_entities_per_alias=max_entities_per_alias, min_occ=min_occ,)
|
||||||
|
|
||||||
|
@ -171,7 +171,10 @@ def _add_aliases(kb, title_to_id, max_entities_per_alias, min_occ, to_print=Fals
|
||||||
prior_probs.append(p_entity_givenalias)
|
prior_probs.append(p_entity_givenalias)
|
||||||
|
|
||||||
if selected_entities:
|
if selected_entities:
|
||||||
kb.add_alias(alias=previous_alias, entities=selected_entities, probabilities=prior_probs)
|
try:
|
||||||
|
kb.add_alias(alias=previous_alias, entities=selected_entities, probabilities=prior_probs)
|
||||||
|
except ValueError as e:
|
||||||
|
print(e)
|
||||||
total_count = 0
|
total_count = 0
|
||||||
counts = list()
|
counts = list()
|
||||||
entities = list()
|
entities = list()
|
||||||
|
|
|
@ -179,9 +179,9 @@ cdef class KnowledgeBase:
|
||||||
entities_length=len(entities),
|
entities_length=len(entities),
|
||||||
probabilities_length=len(probabilities)))
|
probabilities_length=len(probabilities)))
|
||||||
|
|
||||||
# Throw an error if the probabilities sum up to more than 1
|
# Throw an error if the probabilities sum up to more than 1 (allow for some rounding errors)
|
||||||
prob_sum = sum(probabilities)
|
prob_sum = sum(probabilities)
|
||||||
if prob_sum > 1:
|
if prob_sum > 1.00001:
|
||||||
raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
|
raise ValueError(Errors.E133.format(alias=alias, sum=prob_sum))
|
||||||
|
|
||||||
cdef hash_t alias_hash = self.vocab.strings.add(alias)
|
cdef hash_t alias_hash = self.vocab.strings.add(alias)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user