diff --git a/bin/wiki_entity_linking/training_set_creator.py b/bin/wiki_entity_linking/training_set_creator.py index 436154409..5d401bb3f 100644 --- a/bin/wiki_entity_linking/training_set_creator.py +++ b/bin/wiki_entity_linking/training_set_creator.py @@ -7,7 +7,7 @@ import bz2 import datetime from spacy.gold import GoldParse -from bin.wiki_entity_linking import kb_creator, wikipedia_processor as wp +from bin.wiki_entity_linking import kb_creator """ Process Wikipedia interlinks to generate a training dataset for the EL algorithm. @@ -342,8 +342,7 @@ def read_training(nlp, training_dir, dev, limit): # currently feeding the gold data one entity per sentence at a time gold_start = int(start) - found_ent.sent.start_char gold_end = int(end) - found_ent.sent.start_char - gold_entities = [] - gold_entities.append((gold_start, gold_end, wp_title)) + gold_entities = [(gold_start, gold_end, wp_title)] gold = GoldParse(doc=sent, links=gold_entities) data.append((sent, gold)) total_entities += 1 diff --git a/examples/pipeline/wikidata_entity_linking.py b/examples/pipeline/wikidata_entity_linking.py index c0a7e3c66..d914f033c 100644 --- a/examples/pipeline/wikidata_entity_linking.py +++ b/examples/pipeline/wikidata_entity_linking.py @@ -394,10 +394,10 @@ def _measure_baselines(data, kb): print("Error assessing accuracy", e) acc_prior, acc_prior_by_label = calculate_acc(prior_correct_by_label, prior_incorrect_by_label) - acc_random, acc_random_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label) + acc_rand, acc_rand_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label) acc_oracle, acc_oracle_by_label = calculate_acc(oracle_correct_by_label, oracle_incorrect_by_label) - return counts_by_label, acc_random, acc_random_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label + return counts_by_label, acc_rand, acc_rand_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label def calculate_acc(correct_by_label, incorrect_by_label):