mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 01:04:34 +03:00
Fix tok2vec structure after model registry refactor (#4549)
The model registry refactor of the Tok2Vec function broke loading models trained with the previous function, because the model tree was slightly different. Specifically, the new function wrote: concatenate(norm, prefix, suffix, shape) To build the embedding layer. In the previous implementation, I had used the operator overloading shortcut: ( norm | prefix | suffix | shape ) This actually gets mapped to a binary association, giving something like: concatenate(norm, concatenate(prefix, concatenate(suffix, shape))) This is a different tree, so the layers iterate differently and we loaded the weights wrongly.
This commit is contained in:
parent
bade60fe64
commit
9e210fa7fd
|
@ -2,7 +2,7 @@ from __future__ import unicode_literals
|
||||||
|
|
||||||
from thinc.api import chain, layerize, clone, concatenate, with_flatten, uniqued
|
from thinc.api import chain, layerize, clone, concatenate, with_flatten, uniqued
|
||||||
from thinc.api import noop, with_square_sequences
|
from thinc.api import noop, with_square_sequences
|
||||||
from thinc.v2v import Maxout
|
from thinc.v2v import Maxout, Model
|
||||||
from thinc.i2v import HashEmbed, StaticVectors
|
from thinc.i2v import HashEmbed, StaticVectors
|
||||||
from thinc.t2t import ExtractWindow
|
from thinc.t2t import ExtractWindow
|
||||||
from thinc.misc import Residual, LayerNorm, FeatureExtracter
|
from thinc.misc import Residual, LayerNorm, FeatureExtracter
|
||||||
|
@ -33,27 +33,49 @@ def Doc2Feats(config):
|
||||||
|
|
||||||
@register_architecture("spacy.MultiHashEmbed.v1")
|
@register_architecture("spacy.MultiHashEmbed.v1")
|
||||||
def MultiHashEmbed(config):
|
def MultiHashEmbed(config):
|
||||||
|
# For backwards compatibility with models before the architecture registry,
|
||||||
|
# we have to be careful to get exactly the same model structure. One subtle
|
||||||
|
# trick is that when we define concatenation with the operator, the operator
|
||||||
|
# is actually binary associative. So when we write (a | b | c), we're actually
|
||||||
|
# getting concatenate(concatenate(a, b), c). That's why the implementation
|
||||||
|
# is a bit ugly here.
|
||||||
cols = config["columns"]
|
cols = config["columns"]
|
||||||
width = config["width"]
|
width = config["width"]
|
||||||
rows = config["rows"]
|
rows = config["rows"]
|
||||||
|
|
||||||
tables = [HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")]
|
norm = HashEmbed(width, rows, column=cols.index("NORM"), name="embed_norm")
|
||||||
if config["use_subwords"]:
|
if config["use_subwords"]:
|
||||||
for feature in ["PREFIX", "SUFFIX", "SHAPE"]:
|
prefix = HashEmbed(width, rows // 2,
|
||||||
tables.append(
|
column=cols.index("PREFIX"), name="embed_prefix")
|
||||||
HashEmbed(
|
suffix = HashEmbed(width, rows // 2,
|
||||||
width,
|
column=cols.index("SUFFIX"), name="embed_suffix")
|
||||||
rows // 2,
|
shape = HashEmbed(width, rows // 2,
|
||||||
column=cols.index(feature),
|
column=cols.index("SHAPE"), name="embed_shape")
|
||||||
name="embed_%s" % feature.lower(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
if config.get("@pretrained_vectors"):
|
if config.get("@pretrained_vectors"):
|
||||||
tables.append(make_layer(config["@pretrained_vectors"]))
|
glove = make_layer(config["@pretrained_vectors"])
|
||||||
mix = make_layer(config["@mix"])
|
mix = make_layer(config["@mix"])
|
||||||
# This is a pretty ugly hack. Not sure what the best solution should be.
|
|
||||||
mix._layers[0].nI = sum(table.nO for table in tables)
|
with Model.define_operators({">>": chain, "|": concatenate}):
|
||||||
layer = uniqued(chain(concatenate(*tables), mix), column=cols.index("ORTH"))
|
if config["use_subwords"] and config["@pretrained_vectors"]:
|
||||||
|
mix._layers[0].nI = width * 5
|
||||||
|
layer = uniqued(
|
||||||
|
(glove | norm | prefix | suffix | shape) >> mix,
|
||||||
|
column=cols.index("ORTH")
|
||||||
|
)
|
||||||
|
elif config["use_subwords"]:
|
||||||
|
mix._layers[0].nI = width * 4
|
||||||
|
layer = uniqued(
|
||||||
|
(norm | prefix | suffix | shape) >> mix,
|
||||||
|
column=cols.index("ORTH")
|
||||||
|
)
|
||||||
|
elif config["@pretrained_vectors"]:
|
||||||
|
mix._layers[0].nI = width * 2
|
||||||
|
embed = uniqued(
|
||||||
|
(glove | norm) >> mix,
|
||||||
|
column=cols.index("ORTH"),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
embed = norm
|
||||||
layer.cfg = config
|
layer.cfg = config
|
||||||
return layer
|
return layer
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user