mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-28 18:24:08 +03:00
small fixes
This commit is contained in:
parent
6521cfa132
commit
b12001f368
|
@ -116,8 +116,8 @@ def run_pipeline():
|
||||||
# STEP 6: create the entity linking pipe
|
# STEP 6: create the entity linking pipe
|
||||||
if train_pipe:
|
if train_pipe:
|
||||||
print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
|
print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
|
||||||
train_limit = 100
|
train_limit = 5000
|
||||||
dev_limit = 20
|
dev_limit = 1000
|
||||||
print("Training on", train_limit, "articles")
|
print("Training on", train_limit, "articles")
|
||||||
print("Dev testing on", dev_limit, "articles")
|
print("Dev testing on", dev_limit, "articles")
|
||||||
print()
|
print()
|
||||||
|
@ -145,6 +145,7 @@ def run_pipeline():
|
||||||
random.shuffle(train_data)
|
random.shuffle(train_data)
|
||||||
losses = {}
|
losses = {}
|
||||||
batches = minibatch(train_data, size=compounding(4.0, 128.0, 1.001))
|
batches = minibatch(train_data, size=compounding(4.0, 128.0, 1.001))
|
||||||
|
batchnr = 0
|
||||||
|
|
||||||
with nlp.disable_pipes(*other_pipes):
|
with nlp.disable_pipes(*other_pipes):
|
||||||
for batch in batches:
|
for batch in batches:
|
||||||
|
@ -156,35 +157,43 @@ def run_pipeline():
|
||||||
drop=DROPOUT,
|
drop=DROPOUT,
|
||||||
losses=losses,
|
losses=losses,
|
||||||
)
|
)
|
||||||
|
batchnr += 1
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
print("Error updating batch", e)
|
print("Error updating batch", e)
|
||||||
|
|
||||||
|
losses['entity_linker'] = losses['entity_linker'] / batchnr
|
||||||
print("Epoch, train loss", itn, round(losses['entity_linker'], 2))
|
print("Epoch, train loss", itn, round(losses['entity_linker'], 2))
|
||||||
|
|
||||||
# baseline using only prior probabilities
|
print()
|
||||||
el_pipe.context_weight = 0
|
print("STEP 7: performance measurement of Entity Linking pipe", datetime.datetime.now())
|
||||||
el_pipe.prior_weight = 1
|
print()
|
||||||
dev_acc_0_1 = _measure_accuracy(dev_data, el_pipe)
|
|
||||||
train_acc_0_1 = _measure_accuracy(train_data, el_pipe)
|
|
||||||
|
|
||||||
# print(" measuring accuracy 1-1")
|
# print(" measuring accuracy 1-1")
|
||||||
el_pipe.context_weight = 1
|
el_pipe.context_weight = 1
|
||||||
el_pipe.prior_weight = 1
|
el_pipe.prior_weight = 1
|
||||||
dev_acc_1_1 = _measure_accuracy(dev_data, el_pipe)
|
dev_acc_1_1 = _measure_accuracy(dev_data, el_pipe)
|
||||||
train_acc_1_1 = _measure_accuracy(train_data, el_pipe)
|
train_acc_1_1 = _measure_accuracy(train_data, el_pipe)
|
||||||
|
print("train/dev acc combo:", round(train_acc_1_1, 2), round(dev_acc_1_1, 2))
|
||||||
|
|
||||||
# print(" measuring accuracy 1-0")
|
# baseline using only prior probabilities
|
||||||
|
el_pipe.context_weight = 0
|
||||||
|
el_pipe.prior_weight = 1
|
||||||
|
dev_acc_0_1 = _measure_accuracy(dev_data, el_pipe)
|
||||||
|
train_acc_0_1 = _measure_accuracy(train_data, el_pipe)
|
||||||
|
print("train/dev acc prior:", round(train_acc_0_1, 2), round(dev_acc_0_1, 2))
|
||||||
|
|
||||||
|
# using only context
|
||||||
el_pipe.context_weight = 1
|
el_pipe.context_weight = 1
|
||||||
el_pipe.prior_weight = 0
|
el_pipe.prior_weight = 0
|
||||||
dev_acc_1_0 = _measure_accuracy(dev_data, el_pipe)
|
dev_acc_1_0 = _measure_accuracy(dev_data, el_pipe)
|
||||||
train_acc_1_0 = _measure_accuracy(train_data, el_pipe)
|
train_acc_1_0 = _measure_accuracy(train_data, el_pipe)
|
||||||
|
|
||||||
print("train/dev acc, 1-1, 0-1, 1-0:" ,
|
print("train/dev acc context:", round(train_acc_1_0, 2), round(dev_acc_1_0, 2))
|
||||||
round(train_acc_1_1, 2), round(train_acc_0_1, 2), round(train_acc_1_0, 2), "/",
|
print()
|
||||||
round(dev_acc_1_1, 2), round(dev_acc_0_1, 2), round(dev_acc_1_0, 2))
|
|
||||||
|
|
||||||
# test Entity Linker
|
|
||||||
if to_test_pipeline:
|
if to_test_pipeline:
|
||||||
|
print()
|
||||||
|
print("STEP 8: applying Entity Linking to toy example", datetime.datetime.now())
|
||||||
print()
|
print()
|
||||||
run_el_toy_example(kb=my_kb, nlp=nlp)
|
run_el_toy_example(kb=my_kb, nlp=nlp)
|
||||||
print()
|
print()
|
||||||
|
@ -197,9 +206,9 @@ def _measure_accuracy(data, el_pipe):
|
||||||
correct = 0
|
correct = 0
|
||||||
incorrect = 0
|
incorrect = 0
|
||||||
|
|
||||||
docs = [d for d, g in data]
|
docs = [d for d, g in data if len(d) > 0]
|
||||||
docs = el_pipe.pipe(docs)
|
docs = el_pipe.pipe(docs)
|
||||||
golds = [g for d, g in data]
|
golds = [g for d, g in data if len(d) > 0]
|
||||||
|
|
||||||
for doc, gold in zip(docs, golds):
|
for doc, gold in zip(docs, golds):
|
||||||
try:
|
try:
|
||||||
|
|
|
@ -1188,7 +1188,7 @@ class EntityLinker(Pipe):
|
||||||
def get_loss(self, docs, golds, scores):
|
def get_loss(self, docs, golds, scores):
|
||||||
targets = [[1] for _ in golds] # assuming we're only using positive examples
|
targets = [[1] for _ in golds] # assuming we're only using positive examples
|
||||||
loss, gradients = self.get_cossim_loss_2(yh=scores, y=golds, t=targets)
|
loss, gradients = self.get_cossim_loss_2(yh=scores, y=golds, t=targets)
|
||||||
#loss = loss / len(golds)
|
loss = loss / len(golds)
|
||||||
return loss, gradients
|
return loss, gradients
|
||||||
|
|
||||||
def get_cossim_loss_2(self, yh, y, t):
|
def get_cossim_loss_2(self, yh, y, t):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user