mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
small fixes
This commit is contained in:
parent
2d2dea9924
commit
3420cbe496
|
@ -7,7 +7,7 @@ import bz2
|
|||
import datetime
|
||||
|
||||
from spacy.gold import GoldParse
|
||||
from bin.wiki_entity_linking import kb_creator, wikipedia_processor as wp
|
||||
from bin.wiki_entity_linking import kb_creator
|
||||
|
||||
"""
|
||||
Process Wikipedia interlinks to generate a training dataset for the EL algorithm.
|
||||
|
@ -342,8 +342,7 @@ def read_training(nlp, training_dir, dev, limit):
|
|||
# currently feeding the gold data one entity per sentence at a time
|
||||
gold_start = int(start) - found_ent.sent.start_char
|
||||
gold_end = int(end) - found_ent.sent.start_char
|
||||
gold_entities = []
|
||||
gold_entities.append((gold_start, gold_end, wp_title))
|
||||
gold_entities = [(gold_start, gold_end, wp_title)]
|
||||
gold = GoldParse(doc=sent, links=gold_entities)
|
||||
data.append((sent, gold))
|
||||
total_entities += 1
|
||||
|
|
|
@ -394,10 +394,10 @@ def _measure_baselines(data, kb):
|
|||
print("Error assessing accuracy", e)
|
||||
|
||||
acc_prior, acc_prior_by_label = calculate_acc(prior_correct_by_label, prior_incorrect_by_label)
|
||||
acc_random, acc_random_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
|
||||
acc_rand, acc_rand_by_label = calculate_acc(random_correct_by_label, random_incorrect_by_label)
|
||||
acc_oracle, acc_oracle_by_label = calculate_acc(oracle_correct_by_label, oracle_incorrect_by_label)
|
||||
|
||||
return counts_by_label, acc_random, acc_random_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label
|
||||
return counts_by_label, acc_rand, acc_rand_by_label, acc_prior, acc_prior_by_label, acc_oracle, acc_oracle_by_label
|
||||
|
||||
|
||||
def calculate_acc(correct_by_label, incorrect_by_label):
|
||||
|
|
Loading…
Reference in New Issue
Block a user