Add function to make morphologizer model

This commit is contained in:
Matthew Honnibal 2018-09-25 10:57:59 +02:00
parent be8cf39e16
commit e6dde97295

View File

@ -483,7 +483,33 @@ class MultiSoftmax(Affine):
return output__BO, finish_update return output__BO, finish_update
def build_tagger_model(class_nums, **cfg): def build_tagger_model(nr_class, **cfg):
embed_size = util.env_opt('embed_size', 7000)
if 'token_vector_width' in cfg:
token_vector_width = cfg['token_vector_width']
else:
token_vector_width = util.env_opt('token_vector_width', 128)
pretrained_vectors = cfg.get('pretrained_vectors')
subword_features = cfg.get('subword_features', True)
with Model.define_operators({'>>': chain, '+': add}):
if 'tok2vec' in cfg:
tok2vec = cfg['tok2vec']
else:
tok2vec = Tok2Vec(token_vector_width, embed_size,
subword_features=subword_features,
pretrained_vectors=pretrained_vectors)
softmax = with_flatten(
Softmax(nr_class, token_vector_width))
model = (
tok2vec
>> softmax
)
model.nI = None
model.tok2vec = tok2vec
model.softmax = softmax
return model
def build_morphologizer_model(class_nums, **cfg):
embed_size = util.env_opt('embed_size', 7000) embed_size = util.env_opt('embed_size', 7000)
if 'token_vector_width' in cfg: if 'token_vector_width' in cfg:
token_vector_width = cfg['token_vector_width'] token_vector_width = cfg['token_vector_width']