small fixes

This commit is contained in:
svlandeg 2019-07-03 10:25:51 +02:00
parent 2d2dea9924
commit 3420cbe496
2 changed files with 4 additions and 5 deletions

View File

@ -7,7 +7,7 @@ import bz2
import datetime
from spacy.gold import GoldParse
from bin.wiki_entity_linking import kb_creator, wikipedia_processor as wp
from bin.wiki_entity_linking import kb_creator
"""
Process Wikipedia interlinks to generate a training dataset for the EL algorithm.
@ -342,8 +342,7 @@ def read_training(nlp, training_dir, dev, limit):
# currently feeding the gold data one entity per sentence at a time
gold_start = int(start) - found_ent.sent.start_char
gold_end = int(end) - found_ent.sent.start_char
gold_entities = []
gold_entities.append((gold_start, gold_end, wp_title))
gold_entities = [(gold_start, gold_end, wp_title)]
gold = GoldParse(doc=sent, links=gold_entities)
data.append((sent, gold))
total_entities += 1

View File

@ -394,10 +394,10 @@ def _measure_baselines(data, kb):
print("Error assessing accuracy", e)
acc_prior, acc_prior_by_label = calculate_acc(prior_correct_by_label, prior_incorrect_by_label)
acc_random, acc_random_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
acc_rand, acc_rand_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
acc_oracle, acc_oracle_by_label = calculate_acc(oracle_correct_by_label, oracle_incorrect_by_label)
return counts_by_label, acc_random, acc_random_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label
return counts_by_label, acc_rand, acc_rand_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label
def calculate_acc(correct_by_label, incorrect_by_label):