different architecture / settings

This commit is contained in:
svlandeg 2019-05-14 08:37:52 +02:00
parent 4142e8dd1b
commit 09ed446b20
2 changed files with 22 additions and 23 deletions

View File

@ -4,18 +4,17 @@ from __future__ import unicode_literals
import os
import datetime
from os import listdir
import numpy as np
from random import shuffle
from examples.pipeline.wiki_entity_linking import run_el, training_set_creator, kb_creator
from spacy._ml import SpacyVectors, create_default_optimizer, zero_init
from thinc.api import chain, flatten_add_lengths, with_getitem, clone, with_flatten
from thinc.v2v import Model, Maxout, Softmax, Affine, ReLu
from thinc.api import chain, flatten_add_lengths, with_getitem, clone
from thinc.v2v import Model, Softmax, Maxout, Affine, ReLu
from thinc.t2v import Pooling, sum_pool, mean_pool
from thinc.t2t import ExtractWindow, ParametricAttention
from thinc.misc import Residual, LayerNorm as LN
from thinc.t2t import ParametricAttention
from thinc.misc import Residual
from spacy.tokens import Doc
@ -35,18 +34,20 @@ class EL_Model():
self.entity_encoder = self._simple_encoder(in_width=300, out_width=96)
self.article_encoder = self._simple_encoder(in_width=300, out_width=96)
def train_model(self, training_dir, entity_descr_output, limit=None, to_print=True):
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
Doc.set_extension("entity_id", default=None)
train_instances, train_pos, train_neg, train_doc = self._get_training_data(training_dir,
entity_descr_output,
False,
limit, to_print)
trainlimit,
to_print)
dev_instances, dev_pos, dev_neg, dev_doc = self._get_training_data(training_dir,
entity_descr_output,
True,
limit / 10, to_print)
devlimit,
to_print)
if to_print:
print("Training on", len(train_instances.values()), "articles")
@ -78,7 +79,6 @@ class EL_Model():
if to_print:
print("Trained on", instance_count, "instance clusters")
def _test_dev(self, dev_instances, dev_pos, dev_neg, dev_doc):
predictions = list()
golds = list()
@ -129,19 +129,19 @@ class EL_Model():
conv_depth = 1
cnn_maxout_pieces = 3
with Model.define_operators({">>": chain, "**": clone}):
# encoder = SpacyVectors \
# >> flatten_add_lengths \
# >> ParametricAttention(in_width)\
# >> Pooling(mean_pool) \
# >> Residual(zero_init(Maxout(in_width, in_width))) \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
encoder = SpacyVectors \
>> flatten_add_lengths \
>> with_getitem(0, Affine(in_width, in_width)) \
>> ParametricAttention(in_width) \
>> Pooling(sum_pool) \
>> Residual(ReLu(in_width, in_width)) ** conv_depth \
>> zero_init(Affine(out_width, in_width, drop_factor=0.0))
>> flatten_add_lengths \
>> ParametricAttention(in_width)\
>> Pooling(mean_pool) \
>> Residual(zero_init(Maxout(in_width, in_width))) \
>> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# encoder = SpacyVectors \
# >> flatten_add_lengths \
# >> with_getitem(0, Affine(in_width, in_width)) \
# >> ParametricAttention(in_width) \
# >> Pooling(sum_pool) \
# >> Residual(ReLu(in_width, in_width)) ** conv_depth \
# >> zero_init(Affine(out_width, in_width, drop_factor=0.0))
# >> zero_init(Affine(nr_class, width, drop_factor=0.0))
# >> logistic
@ -178,7 +178,6 @@ class EL_Model():
# print("encoding dim", len(true_entity_encoding[0]))
consensus_encoding = self._calculate_consensus(doc_encoding, true_entity_encoding)
# consensus_encoding_t = consensus_encoding.transpose()
doc_mse, doc_diff = self._calculate_similarity(doc_encoding, consensus_encoding)

View File

@ -111,7 +111,7 @@ if __name__ == "__main__":
print("STEP 6: training ", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, limit=500)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=50, devlimit=50)
print()
# STEP 7: apply the EL algorithm on the dev dataset