mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
hidden with for encoders
This commit is contained in:
parent
7b13e3d56f
commit
eb08bdb11f
|
@ -33,9 +33,10 @@ class EL_Model:
|
|||
CUTOFF = 0.5
|
||||
|
||||
INPUT_DIM = 300
|
||||
HIDDEN_1_WIDTH = 256 # 10
|
||||
HIDDEN_2_WIDTH = 32 # 6
|
||||
ENTITY_WIDTH = 64 # 4
|
||||
ARTICLE_WIDTH = 128 # 8
|
||||
HIDDEN_WIDTH = 64 # 6
|
||||
|
||||
DROP = 0.1
|
||||
|
||||
|
@ -46,7 +47,11 @@ class EL_Model:
|
|||
self.nlp = nlp
|
||||
self.kb = kb
|
||||
|
||||
self._build_cnn(hidden_entity_width=self.ENTITY_WIDTH, hidden_article_width=self.ARTICLE_WIDTH)
|
||||
self._build_cnn(in_width=self.INPUT_DIM,
|
||||
entity_width=self.ENTITY_WIDTH,
|
||||
article_width=self.ARTICLE_WIDTH,
|
||||
hidden_1_width=self.HIDDEN_1_WIDTH,
|
||||
hidden_2_width=self.HIDDEN_2_WIDTH)
|
||||
|
||||
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
|
||||
# raise errors instead of runtime warnings in case of int/float overflow
|
||||
|
@ -81,9 +86,10 @@ class EL_Model:
|
|||
print()
|
||||
print(" CUTOFF", self.CUTOFF)
|
||||
print(" INPUT_DIM", self.INPUT_DIM)
|
||||
print(" HIDDEN_1_WIDTH", self.HIDDEN_1_WIDTH)
|
||||
print(" ENTITY_WIDTH", self.ENTITY_WIDTH)
|
||||
print(" ARTICLE_WIDTH", self.ARTICLE_WIDTH)
|
||||
print(" HIDDEN_WIDTH", self.ARTICLE_WIDTH)
|
||||
print(" HIDDEN_2_WIDTH", self.HIDDEN_2_WIDTH)
|
||||
print(" DROP", self.DROP)
|
||||
print()
|
||||
|
||||
|
@ -187,34 +193,34 @@ class EL_Model:
|
|||
else:
|
||||
return [float(1.0) if random.uniform(0,1) > self.CUTOFF else float(0.0) for e in entities]
|
||||
|
||||
def _build_cnn(self, hidden_entity_width, hidden_article_width):
|
||||
def _build_cnn(self, in_width, entity_width, article_width, hidden_1_width, hidden_2_width):
|
||||
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
|
||||
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width)
|
||||
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width)
|
||||
self.entity_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=entity_width)
|
||||
self.article_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=article_width)
|
||||
|
||||
nr_i = hidden_entity_width + hidden_article_width
|
||||
nr_o = self.HIDDEN_WIDTH
|
||||
in_width = entity_width + article_width
|
||||
out_width = hidden_2_width
|
||||
|
||||
self.model = Affine(nr_o, nr_i) \
|
||||
>> LN(Maxout(nr_o, nr_o)) \
|
||||
>> Affine(1, nr_o) \
|
||||
self.model = Affine(out_width, in_width) \
|
||||
>> LN(Maxout(out_width, out_width)) \
|
||||
>> Affine(1, out_width) \
|
||||
>> logistic
|
||||
|
||||
@staticmethod
|
||||
def _encoder(in_width, hidden_width):
|
||||
def _encoder(in_width, hidden_with, end_width):
|
||||
conv_depth = 2
|
||||
cnn_maxout_pieces = 3
|
||||
|
||||
with Model.define_operators({">>": chain}):
|
||||
convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(in_width, in_width * 3, pieces=cnn_maxout_pieces))))
|
||||
convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_with, hidden_with * 3, pieces=cnn_maxout_pieces))))
|
||||
|
||||
encoder = SpacyVectors \
|
||||
>> with_flatten(LN(Maxout(in_width, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
|
||||
>> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
|
||||
>> flatten_add_lengths \
|
||||
>> ParametricAttention(in_width)\
|
||||
>> ParametricAttention(hidden_with)\
|
||||
>> Pooling(mean_pool) \
|
||||
>> Residual(zero_init(Maxout(in_width, in_width))) \
|
||||
>> zero_init(Affine(hidden_width, in_width, drop_factor=0.0))
|
||||
>> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
|
||||
>> zero_init(Affine(end_width, hidden_with, drop_factor=0.0))
|
||||
|
||||
# TODO: ReLu or LN(Maxout) ?
|
||||
# sum_pool or mean_pool ?
|
||||
|
|
|
@ -111,7 +111,7 @@ if __name__ == "__main__":
|
|||
print("STEP 6: training", datetime.datetime.now())
|
||||
my_nlp = spacy.load('en_core_web_md')
|
||||
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
|
||||
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=500, devlimit=20)
|
||||
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=20)
|
||||
print()
|
||||
|
||||
# STEP 7: apply the EL algorithm on the dev dataset
|
||||
|
|
Loading…
Reference in New Issue
Block a user