hidden with for encoders

This commit is contained in:
svlandeg 2019-05-21 23:42:46 +02:00
parent 7b13e3d56f
commit eb08bdb11f
2 changed files with 26 additions and 20 deletions

View File

@ -33,9 +33,10 @@ class EL_Model:
CUTOFF = 0.5 CUTOFF = 0.5
INPUT_DIM = 300 INPUT_DIM = 300
HIDDEN_1_WIDTH = 256 # 10
HIDDEN_2_WIDTH = 32 # 6
ENTITY_WIDTH = 64 # 4 ENTITY_WIDTH = 64 # 4
ARTICLE_WIDTH = 128 # 8 ARTICLE_WIDTH = 128 # 8
HIDDEN_WIDTH = 64 # 6
DROP = 0.1 DROP = 0.1
@ -46,7 +47,11 @@ class EL_Model:
self.nlp = nlp self.nlp = nlp
self.kb = kb self.kb = kb
self._build_cnn(hidden_entity_width=self.ENTITY_WIDTH, hidden_article_width=self.ARTICLE_WIDTH) self._build_cnn(in_width=self.INPUT_DIM,
entity_width=self.ENTITY_WIDTH,
article_width=self.ARTICLE_WIDTH,
hidden_1_width=self.HIDDEN_1_WIDTH,
hidden_2_width=self.HIDDEN_2_WIDTH)
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True): def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
# raise errors instead of runtime warnings in case of int/float overflow # raise errors instead of runtime warnings in case of int/float overflow
@ -81,9 +86,10 @@ class EL_Model:
print() print()
print(" CUTOFF", self.CUTOFF) print(" CUTOFF", self.CUTOFF)
print(" INPUT_DIM", self.INPUT_DIM) print(" INPUT_DIM", self.INPUT_DIM)
print(" HIDDEN_1_WIDTH", self.HIDDEN_1_WIDTH)
print(" ENTITY_WIDTH", self.ENTITY_WIDTH) print(" ENTITY_WIDTH", self.ENTITY_WIDTH)
print(" ARTICLE_WIDTH", self.ARTICLE_WIDTH) print(" ARTICLE_WIDTH", self.ARTICLE_WIDTH)
print(" HIDDEN_WIDTH", self.ARTICLE_WIDTH) print(" HIDDEN_2_WIDTH", self.HIDDEN_2_WIDTH)
print(" DROP", self.DROP) print(" DROP", self.DROP)
print() print()
@ -187,34 +193,34 @@ class EL_Model:
else: else:
return [float(1.0) if random.uniform(0,1) > self.CUTOFF else float(0.0) for e in entities] return [float(1.0) if random.uniform(0,1) > self.CUTOFF else float(0.0) for e in entities]
def _build_cnn(self, hidden_entity_width, hidden_article_width): def _build_cnn(self, in_width, entity_width, article_width, hidden_1_width, hidden_2_width):
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}): with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width) self.entity_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=entity_width)
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width) self.article_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=article_width)
nr_i = hidden_entity_width + hidden_article_width in_width = entity_width + article_width
nr_o = self.HIDDEN_WIDTH out_width = hidden_2_width
self.model = Affine(nr_o, nr_i) \ self.model = Affine(out_width, in_width) \
>> LN(Maxout(nr_o, nr_o)) \ >> LN(Maxout(out_width, out_width)) \
>> Affine(1, nr_o) \ >> Affine(1, out_width) \
>> logistic >> logistic
@staticmethod @staticmethod
def _encoder(in_width, hidden_width): def _encoder(in_width, hidden_with, end_width):
conv_depth = 2 conv_depth = 2
cnn_maxout_pieces = 3 cnn_maxout_pieces = 3
with Model.define_operators({">>": chain}): with Model.define_operators({">>": chain}):
convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(in_width, in_width * 3, pieces=cnn_maxout_pieces)))) convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_with, hidden_with * 3, pieces=cnn_maxout_pieces))))
encoder = SpacyVectors \ encoder = SpacyVectors \
>> with_flatten(LN(Maxout(in_width, in_width)) >> convolution ** conv_depth, pad=conv_depth) \ >> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
>> flatten_add_lengths \ >> flatten_add_lengths \
>> ParametricAttention(in_width)\ >> ParametricAttention(hidden_with)\
>> Pooling(mean_pool) \ >> Pooling(mean_pool) \
>> Residual(zero_init(Maxout(in_width, in_width))) \ >> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
>> zero_init(Affine(hidden_width, in_width, drop_factor=0.0)) >> zero_init(Affine(end_width, hidden_with, drop_factor=0.0))
# TODO: ReLu or LN(Maxout) ? # TODO: ReLu or LN(Maxout) ?
# sum_pool or mean_pool ? # sum_pool or mean_pool ?

View File

@ -111,7 +111,7 @@ if __name__ == "__main__":
print("STEP 6: training", datetime.datetime.now()) print("STEP 6: training", datetime.datetime.now())
my_nlp = spacy.load('en_core_web_md') my_nlp = spacy.load('en_core_web_md')
trainer = EL_Model(kb=my_kb, nlp=my_nlp) trainer = EL_Model(kb=my_kb, nlp=my_nlp)
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=500, devlimit=20) trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=20)
print() print()
# STEP 7: apply the EL algorithm on the dev dataset # STEP 7: apply the EL algorithm on the dev dataset