mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
speeding up training
This commit is contained in:
parent
66813a1fdc
commit
6521cfa132
|
@ -115,6 +115,7 @@ def run_pipeline():
|
|||
|
||||
# STEP 6: create the entity linking pipe
|
||||
if train_pipe:
|
||||
print("STEP 6: training Entity Linking pipe", datetime.datetime.now())
|
||||
train_limit = 100
|
||||
dev_limit = 20
|
||||
print("Training on", train_limit, "articles")
|
||||
|
@ -147,6 +148,7 @@ def run_pipeline():
|
|||
|
||||
with nlp.disable_pipes(*other_pipes):
|
||||
for batch in batches:
|
||||
try:
|
||||
docs, golds = zip(*batch)
|
||||
nlp.update(
|
||||
docs,
|
||||
|
@ -154,6 +156,16 @@ def run_pipeline():
|
|||
drop=DROPOUT,
|
||||
losses=losses,
|
||||
)
|
||||
except Exception as e:
|
||||
print("Error updating batch", e)
|
||||
|
||||
print("Epoch, train loss", itn, round(losses['entity_linker'], 2))
|
||||
|
||||
# baseline using only prior probabilities
|
||||
el_pipe.context_weight = 0
|
||||
el_pipe.prior_weight = 1
|
||||
dev_acc_0_1 = _measure_accuracy(dev_data, el_pipe)
|
||||
train_acc_0_1 = _measure_accuracy(train_data, el_pipe)
|
||||
|
||||
# print(" measuring accuracy 1-1")
|
||||
el_pipe.context_weight = 1
|
||||
|
@ -161,19 +173,13 @@ def run_pipeline():
|
|||
dev_acc_1_1 = _measure_accuracy(dev_data, el_pipe)
|
||||
train_acc_1_1 = _measure_accuracy(train_data, el_pipe)
|
||||
|
||||
# print(" measuring accuracy 0-1")
|
||||
el_pipe.context_weight = 0
|
||||
el_pipe.prior_weight = 1
|
||||
dev_acc_0_1 = _measure_accuracy(dev_data, el_pipe)
|
||||
train_acc_0_1 = _measure_accuracy(train_data, el_pipe)
|
||||
|
||||
# print(" measuring accuracy 1-0")
|
||||
el_pipe.context_weight = 1
|
||||
el_pipe.prior_weight = 0
|
||||
dev_acc_1_0 = _measure_accuracy(dev_data, el_pipe)
|
||||
train_acc_1_0 = _measure_accuracy(train_data, el_pipe)
|
||||
|
||||
print("Epoch, train loss, train/dev acc, 1-1, 0-1, 1-0:", itn, round(losses['entity_linker'], 2),
|
||||
print("train/dev acc, 1-1, 0-1, 1-0:" ,
|
||||
round(train_acc_1_1, 2), round(train_acc_0_1, 2), round(train_acc_1_0, 2), "/",
|
||||
round(dev_acc_1_1, 2), round(dev_acc_0_1, 2), round(dev_acc_1_0, 2))
|
||||
|
||||
|
@ -193,10 +199,10 @@ def _measure_accuracy(data, el_pipe):
|
|||
|
||||
docs = [d for d, g in data]
|
||||
docs = el_pipe.pipe(docs)
|
||||
|
||||
golds = [g for d, g in data]
|
||||
|
||||
for doc, gold in zip(docs, golds):
|
||||
try:
|
||||
correct_entries_per_article = dict()
|
||||
for entity in gold.links:
|
||||
start, end, gold_kb = entity
|
||||
|
@ -214,6 +220,9 @@ def _measure_accuracy(data, el_pipe):
|
|||
else:
|
||||
incorrect += 1
|
||||
|
||||
except Exception as e:
|
||||
print("Error assessing accuracy", e)
|
||||
|
||||
if correct == incorrect == 0:
|
||||
return 0
|
||||
|
||||
|
|
|
@ -1220,8 +1220,13 @@ class EntityLinker(Pipe):
|
|||
|
||||
def predict(self, docs):
|
||||
self.require_model()
|
||||
|
||||
if isinstance(docs, Doc):
|
||||
docs = [docs]
|
||||
|
||||
final_entities = list()
|
||||
final_kb_ids = list()
|
||||
|
||||
for i, article_doc in enumerate(docs):
|
||||
doc_encoding = self.article_encoder([article_doc])
|
||||
for ent in article_doc.ents:
|
||||
|
|
Loading…
Reference in New Issue
Block a user