mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-29 18:23:06 +03:00
Fix serialization of model options
This commit is contained in:
parent
0a9016cade
commit
40a4873b70
|
@ -474,8 +474,13 @@ def getitem(i):
|
||||||
return X[i], None
|
return X[i], None
|
||||||
return layerize(getitem_fwd)
|
return layerize(getitem_fwd)
|
||||||
|
|
||||||
def build_tagger_model(nr_class, token_vector_width, pretrained_dims=0, **cfg):
|
def build_tagger_model(nr_class, **cfg):
|
||||||
embed_size = util.env_opt('embed_size', 4000)
|
embed_size = util.env_opt('embed_size', 4000)
|
||||||
|
if 'token_vector_width' in cfg:
|
||||||
|
token_vector_width = cfg['token_vector_width']
|
||||||
|
else:
|
||||||
|
token_vector_width = util.env_opt('token_vector_width', 128)
|
||||||
|
pretrained_dims = cfg.get('pretrained_dims', 0)
|
||||||
with Model.define_operators({'>>': chain, '+': add}):
|
with Model.define_operators({'>>': chain, '+': add}):
|
||||||
# Input: (doc, tensor) tuples
|
# Input: (doc, tensor) tuples
|
||||||
private_tok2vec = Tok2Vec(token_vector_width, embed_size,
|
private_tok2vec = Tok2Vec(token_vector_width, embed_size,
|
||||||
|
|
|
@ -297,6 +297,7 @@ class NeuralTagger(BaseThincComponent):
|
||||||
self.model = model
|
self.model = model
|
||||||
self.cfg = dict(cfg)
|
self.cfg = dict(cfg)
|
||||||
self.cfg.setdefault('cnn_maxout_pieces', 2)
|
self.cfg.setdefault('cnn_maxout_pieces', 2)
|
||||||
|
self.cfg.setdefault('pretrained_dims', self.vocab.vectors.data.shape[1])
|
||||||
|
|
||||||
def __call__(self, doc):
|
def __call__(self, doc):
|
||||||
tags = self.predict(([doc], [doc.tensor]))
|
tags = self.predict(([doc], [doc.tensor]))
|
||||||
|
@ -393,15 +394,12 @@ class NeuralTagger(BaseThincComponent):
|
||||||
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
vocab.morphology = Morphology(vocab.strings, new_tag_map,
|
||||||
vocab.morphology.lemmatizer,
|
vocab.morphology.lemmatizer,
|
||||||
exc=vocab.morphology.exc)
|
exc=vocab.morphology.exc)
|
||||||
token_vector_width = pipeline[0].model.nO
|
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
|
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
||||||
pretrained_dims=self.vocab.vectors_length)
|
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
|
def Model(cls, n_tags, **cfg):
|
||||||
return build_tagger_model(n_tags, token_vector_width,
|
return build_tagger_model(n_tags, **cfg)
|
||||||
pretrained_dims)
|
|
||||||
|
|
||||||
def use_params(self, params):
|
def use_params(self, params):
|
||||||
with self.model.use_params(params):
|
with self.model.use_params(params):
|
||||||
|
@ -422,8 +420,7 @@ class NeuralTagger(BaseThincComponent):
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
token_vector_width = util.env_opt('token_vector_width',
|
token_vector_width = util.env_opt('token_vector_width',
|
||||||
self.cfg.get('token_vector_width', 128))
|
self.cfg.get('token_vector_width', 128))
|
||||||
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
|
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
||||||
pretrained_dims=self.vocab.vectors_length)
|
|
||||||
self.model.from_bytes(b)
|
self.model.from_bytes(b)
|
||||||
|
|
||||||
def load_tag_map(b):
|
def load_tag_map(b):
|
||||||
|
@ -457,10 +454,7 @@ class NeuralTagger(BaseThincComponent):
|
||||||
def from_disk(self, path, **exclude):
|
def from_disk(self, path, **exclude):
|
||||||
def load_model(p):
|
def load_model(p):
|
||||||
if self.model is True:
|
if self.model is True:
|
||||||
token_vector_width = util.env_opt('token_vector_width',
|
self.model = self.Model(self.vocab.morphology.n_tags, **self.cfg)
|
||||||
self.cfg.get('token_vector_width', 128))
|
|
||||||
self.model = self.Model(self.vocab.morphology.n_tags, token_vector_width,
|
|
||||||
**self.cfg)
|
|
||||||
self.model.from_bytes(p.open('rb').read())
|
self.model.from_bytes(p.open('rb').read())
|
||||||
|
|
||||||
def load_tag_map(p):
|
def load_tag_map(p):
|
||||||
|
@ -514,9 +508,8 @@ class NeuralLabeller(NeuralTagger):
|
||||||
pretrained_dims=self.vocab.vectors_length)
|
pretrained_dims=self.vocab.vectors_length)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def Model(cls, n_tags, token_vector_width, pretrained_dims=0):
|
def Model(cls, n_tags, **cfg):
|
||||||
return build_tagger_model(n_tags, token_vector_width,
|
return build_tagger_model(n_tags, **cfg)
|
||||||
pretrained_dims)
|
|
||||||
|
|
||||||
def get_loss(self, docs, golds, scores):
|
def get_loss(self, docs, golds, scores):
|
||||||
scores = self.model.ops.flatten(scores)
|
scores = self.model.ops.flatten(scores)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user