Fix serialization of model options

This commit is contained in:
Matthew Honnibal 2017-09-21 13:07:26 -05:00
parent 0a9016cade
commit 40a4873b70
2 changed files with 14 additions and 16 deletions

View File

@ -474,8 +474,13 @@ def getitem(i):
return X[i], None
return layerize(getitem_fwd)
def build_tagger_model(nr_class, token_vector_width, pretrained_dims=0, **cfg):
def build_tagger_model(nr_class, **cfg):
embed_size = util.env_opt('embed_size', 4000)
if 'token_vector_width' in cfg:
token_vector_width = cfg['token_vector_width']
else:
token_vector_width = util.env_opt('token_vector_width', 128)
pretrained_dims = cfg.get('pretrained_dims', 0)
with Model.define_operators({'>>': chain, '+': add}):
# Input: (doc, tensor) tuples
private_tok2vec = Tok2Vec(token_vector_width, embed_size,

View File

@ -297,6 +297,7 @@ class NeuralTagger(BaseThincComponent):
self.model = model
self.cfg = dict(cfg)
self.cfg.setdefault('cnn_maxout_pieces', 2)
self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])
def __call__(self, doc):
tags = self.predict(([doc], [doc.tensor]))
@ -393,15 +394,12 @@ class NeuralTagger(BaseThincComponent):
vocab.morphology = Morphology(vocab.strings, new_tag_map,
vocab.morphology.lemmatizer,
exc=vocab.morphology.exc)
token_vector_width = pipeline[0].model.nO
if self.model is True:
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
pretrained_dims=self.vocab.vectors_length)
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
@classmethod
def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
return build_tagger_model(n_tags, token_vector_width,
pretrained_dims)
def Model(cls, n_tags, **cfg):
return build_tagger_model(n_tags, **cfg)
def use_params(self, params):
with self.model.use_params(params):
@ -422,8 +420,7 @@ class NeuralTagger(BaseThincComponent):
if self.model is True:
token_vector_width = util.env_opt('token_vector_width',
self.cfg.get('token_vector_width', 128))
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
pretrained_dims=self.vocab.vectors_length)
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
self.model.from_bytes(b)
def load_tag_map(b):
@ -457,10 +454,7 @@ class NeuralTagger(BaseThincComponent):
def from_disk(self, path, **exclude):
def load_model(p):
if self.model is True:
token_vector_width = util.env_opt('token_vector_width',
self.cfg.get('token_vector_width', 128))
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
**self.cfg)
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
self.model.from_bytes(p.open('rb').read())
def load_tag_map(p):
@ -514,9 +508,8 @@ class NeuralLabeller(NeuralTagger):
pretrained_dims=self.vocab.vectors_length)
@classmethod
def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
return build_tagger_model(n_tags, token_vector_width,
pretrained_dims)
def Model(cls, n_tags, **cfg):
return build_tagger_model(n_tags, **cfg)
def get_loss(self, docs, golds, scores):
scores = self.model.ops.flatten(scores)