mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
hidden with for encoders
This commit is contained in:
parent
7b13e3d56f
commit
eb08bdb11f
|
@ -33,9 +33,10 @@ class EL_Model:
|
||||||
CUTOFF = 0.5
|
CUTOFF = 0.5
|
||||||
|
|
||||||
INPUT_DIM = 300
|
INPUT_DIM = 300
|
||||||
|
HIDDEN_1_WIDTH = 256 # 10
|
||||||
|
HIDDEN_2_WIDTH = 32 # 6
|
||||||
ENTITY_WIDTH = 64 # 4
|
ENTITY_WIDTH = 64 # 4
|
||||||
ARTICLE_WIDTH = 128 # 8
|
ARTICLE_WIDTH = 128 # 8
|
||||||
HIDDEN_WIDTH = 64 # 6
|
|
||||||
|
|
||||||
DROP = 0.1
|
DROP = 0.1
|
||||||
|
|
||||||
|
@ -46,7 +47,11 @@ class EL_Model:
|
||||||
self.nlp = nlp
|
self.nlp = nlp
|
||||||
self.kb = kb
|
self.kb = kb
|
||||||
|
|
||||||
self._build_cnn(hidden_entity_width=self.ENTITY_WIDTH, hidden_article_width=self.ARTICLE_WIDTH)
|
self._build_cnn(in_width=self.INPUT_DIM,
|
||||||
|
entity_width=self.ENTITY_WIDTH,
|
||||||
|
article_width=self.ARTICLE_WIDTH,
|
||||||
|
hidden_1_width=self.HIDDEN_1_WIDTH,
|
||||||
|
hidden_2_width=self.HIDDEN_2_WIDTH)
|
||||||
|
|
||||||
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
|
def train_model(self, training_dir, entity_descr_output, trainlimit=None, devlimit=None, to_print=True):
|
||||||
# raise errors instead of runtime warnings in case of int/float overflow
|
# raise errors instead of runtime warnings in case of int/float overflow
|
||||||
|
@ -81,9 +86,10 @@ class EL_Model:
|
||||||
print()
|
print()
|
||||||
print(" CUTOFF", self.CUTOFF)
|
print(" CUTOFF", self.CUTOFF)
|
||||||
print(" INPUT_DIM", self.INPUT_DIM)
|
print(" INPUT_DIM", self.INPUT_DIM)
|
||||||
|
print(" HIDDEN_1_WIDTH", self.HIDDEN_1_WIDTH)
|
||||||
print(" ENTITY_WIDTH", self.ENTITY_WIDTH)
|
print(" ENTITY_WIDTH", self.ENTITY_WIDTH)
|
||||||
print(" ARTICLE_WIDTH", self.ARTICLE_WIDTH)
|
print(" ARTICLE_WIDTH", self.ARTICLE_WIDTH)
|
||||||
print(" HIDDEN_WIDTH", self.ARTICLE_WIDTH)
|
print(" HIDDEN_2_WIDTH", self.HIDDEN_2_WIDTH)
|
||||||
print(" DROP", self.DROP)
|
print(" DROP", self.DROP)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
|
@ -187,34 +193,34 @@ class EL_Model:
|
||||||
else:
|
else:
|
||||||
return [float(1.0) if random.uniform(0,1) > self.CUTOFF else float(0.0) for e in entities]
|
return [float(1.0) if random.uniform(0,1) > self.CUTOFF else float(0.0) for e in entities]
|
||||||
|
|
||||||
def _build_cnn(self, hidden_entity_width, hidden_article_width):
|
def _build_cnn(self, in_width, entity_width, article_width, hidden_1_width, hidden_2_width):
|
||||||
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
|
with Model.define_operators({">>": chain, "|": concatenate, "**": clone}):
|
||||||
self.entity_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_entity_width)
|
self.entity_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=entity_width)
|
||||||
self.article_encoder = self._encoder(in_width=self.INPUT_DIM, hidden_width=hidden_article_width)
|
self.article_encoder = self._encoder(in_width=in_width, hidden_with=hidden_1_width, end_width=article_width)
|
||||||
|
|
||||||
nr_i = hidden_entity_width + hidden_article_width
|
in_width = entity_width + article_width
|
||||||
nr_o = self.HIDDEN_WIDTH
|
out_width = hidden_2_width
|
||||||
|
|
||||||
self.model = Affine(nr_o, nr_i) \
|
self.model = Affine(out_width, in_width) \
|
||||||
>> LN(Maxout(nr_o, nr_o)) \
|
>> LN(Maxout(out_width, out_width)) \
|
||||||
>> Affine(1, nr_o) \
|
>> Affine(1, out_width) \
|
||||||
>> logistic
|
>> logistic
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _encoder(in_width, hidden_width):
|
def _encoder(in_width, hidden_with, end_width):
|
||||||
conv_depth = 2
|
conv_depth = 2
|
||||||
cnn_maxout_pieces = 3
|
cnn_maxout_pieces = 3
|
||||||
|
|
||||||
with Model.define_operators({">>": chain}):
|
with Model.define_operators({">>": chain}):
|
||||||
convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(in_width, in_width * 3, pieces=cnn_maxout_pieces))))
|
convolution = Residual((ExtractWindow(nW=1) >> LN(Maxout(hidden_with, hidden_with * 3, pieces=cnn_maxout_pieces))))
|
||||||
|
|
||||||
encoder = SpacyVectors \
|
encoder = SpacyVectors \
|
||||||
>> with_flatten(LN(Maxout(in_width, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
|
>> with_flatten(LN(Maxout(hidden_with, in_width)) >> convolution ** conv_depth, pad=conv_depth) \
|
||||||
>> flatten_add_lengths \
|
>> flatten_add_lengths \
|
||||||
>> ParametricAttention(in_width)\
|
>> ParametricAttention(hidden_with)\
|
||||||
>> Pooling(mean_pool) \
|
>> Pooling(mean_pool) \
|
||||||
>> Residual(zero_init(Maxout(in_width, in_width))) \
|
>> Residual(zero_init(Maxout(hidden_with, hidden_with))) \
|
||||||
>> zero_init(Affine(hidden_width, in_width, drop_factor=0.0))
|
>> zero_init(Affine(end_width, hidden_with, drop_factor=0.0))
|
||||||
|
|
||||||
# TODO: ReLu or LN(Maxout) ?
|
# TODO: ReLu or LN(Maxout) ?
|
||||||
# sum_pool or mean_pool ?
|
# sum_pool or mean_pool ?
|
||||||
|
|
|
@ -111,7 +111,7 @@ if __name__ == "__main__":
|
||||||
print("STEP 6: training", datetime.datetime.now())
|
print("STEP 6: training", datetime.datetime.now())
|
||||||
my_nlp = spacy.load('en_core_web_md')
|
my_nlp = spacy.load('en_core_web_md')
|
||||||
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
|
trainer = EL_Model(kb=my_kb, nlp=my_nlp)
|
||||||
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=500, devlimit=20)
|
trainer.train_model(training_dir=TRAINING_DIR, entity_descr_output=ENTITY_DESCR, trainlimit=1000, devlimit=20)
|
||||||
print()
|
print()
|
||||||
|
|
||||||
# STEP 7: apply the EL algorithm on the dev dataset
|
# STEP 7: apply the EL algorithm on the dev dataset
|
||||||
|
|
Loading…
Reference in New Issue
Block a user