small fixes

2025-10-29 23:17:59 +03:00 · 2019-07-03 10:25:51 +02:00 · 2019-07-03 10:25:51 +02:00 · 3420cbe496
commit 3420cbe496
parent 2d2dea9924
2 changed files with 4 additions and 5 deletions
--- a/bin/wiki_entity_linking/training_set_creator.py
+++ b/bin/wiki_entity_linking/training_set_creator.py
@ -7,7 +7,7 @@ import bz2
 import datetime

 from spacy.gold import GoldParse
-from bin.wiki_entity_linking import kb_creator, wikipedia_processor as wp
+from bin.wiki_entity_linking import kb_creator

 """
 Process Wikipedia interlinks to generate a training dataset for the EL algorithm.
@ -342,8 +342,7 @@ def read_training(nlp, training_dir, dev, limit):
                                # currently feeding the gold data one entity per sentence at a time
                                gold_start = int(start) - found_ent.sent.start_char
                                gold_end = int(end) - found_ent.sent.start_char
-                                gold_entities = []
-                                gold_entities.append((gold_start, gold_end, wp_title))
+                                gold_entities = [(gold_start, gold_end, wp_title)]
                                gold = GoldParse(doc=sent, links=gold_entities)
                                data.append((sent, gold))
                                total_entities += 1
--- a/examples/pipeline/wikidata_entity_linking.py
+++ b/examples/pipeline/wikidata_entity_linking.py
@ -394,10 +394,10 @@ def _measure_baselines(data, kb):
            print("Error assessing accuracy", e)

    acc_prior, acc_prior_by_label = calculate_acc(prior_correct_by_label, prior_incorrect_by_label)
-    acc_random, acc_random_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
+    acc_rand, acc_rand_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
    acc_oracle, acc_oracle_by_label = calculate_acc(oracle_correct_by_label, oracle_incorrect_by_label)

-    return counts_by_label, acc_random, acc_random_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label
+    return counts_by_label, acc_rand, acc_rand_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label


 def calculate_acc(correct_by_label, incorrect_by_label):