mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Work on parser. 15 tests failing
This commit is contained in:
parent
af9a30b192
commit
880182afdb
|
@ -42,7 +42,7 @@ def TransitionModel(
|
||||||
"nO": None, # Output size
|
"nO": None, # Output size
|
||||||
"nP": maxout_pieces,
|
"nP": maxout_pieces,
|
||||||
"nH": hidden_width,
|
"nH": hidden_width,
|
||||||
"nI": tok2vec.maybe_get_dim("nO"),
|
"nI": tok2vec_projected.maybe_get_dim("nO"),
|
||||||
"nF": state_tokens,
|
"nF": state_tokens,
|
||||||
},
|
},
|
||||||
attrs={
|
attrs={
|
||||||
|
@ -69,6 +69,9 @@ def resize_output(model: Model, new_nO: int) -> Model:
|
||||||
new_b[:old_nO] = old_b # type: ignore
|
new_b[:old_nO] = old_b # type: ignore
|
||||||
for i in range(old_nO, new_nO):
|
for i in range(old_nO, new_nO):
|
||||||
model.attrs["unseen_classes"].add(i)
|
model.attrs["unseen_classes"].add(i)
|
||||||
|
model.set_param("upper_W", new_W)
|
||||||
|
model.set_param("upper_b", new_b)
|
||||||
|
model.set_dim("nO", new_nO, force=True)
|
||||||
return model
|
return model
|
||||||
|
|
||||||
|
|
||||||
|
@ -167,9 +170,8 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo
|
||||||
if (d_scores[:, clas] < 0).any():
|
if (d_scores[:, clas] < 0).any():
|
||||||
model.attrs["unseen_classes"].remove(clas)
|
model.attrs["unseen_classes"].remove(clas)
|
||||||
d_scores *= unseen_mask
|
d_scores *= unseen_mask
|
||||||
ids = ops.xp.concatenate(all_ids)
|
statevecs = ops.xp.vstack(all_statevecs)
|
||||||
statevecs = ops.xp.concatenate(all_statevecs)
|
which = ops.xp.vstack(all_which)
|
||||||
which = ops.xp.concatenate(all_which)
|
|
||||||
# Calculate the gradients for the parameters of the upper layer.
|
# Calculate the gradients for the parameters of the upper layer.
|
||||||
model.inc_grad("upper_b", d_scores.sum(axis=0))
|
model.inc_grad("upper_b", d_scores.sum(axis=0))
|
||||||
model.inc_grad("upper_W", model.ops.gemm(d_scores, statevecs, trans1=True))
|
model.inc_grad("upper_W", model.ops.gemm(d_scores, statevecs, trans1=True))
|
||||||
|
@ -178,8 +180,12 @@ def forward(model, docs_moves: Tuple[List[Doc], TransitionSystem], is_train: boo
|
||||||
# Backprop through the maxout activation
|
# Backprop through the maxout activation
|
||||||
d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP"))
|
d_preacts = model.ops.backprop_maxout(d_statevecs, which, model.get_dim("nP"))
|
||||||
# We don't need to backprop the summation, because we pass back the IDs instead
|
# We don't need to backprop the summation, because we pass back the IDs instead
|
||||||
d_tokvecs = backprop_feats((d_preacts, ids))
|
d_state_features = backprop_feats((d_preacts, all_ids))
|
||||||
return (backprop_tok2vec(d_tokvecs), None)
|
ids1d = model.ops.xp.vstack(all_ids).flatten()
|
||||||
|
d_state_features = d_state_features.reshape((ids1d.size, -1))
|
||||||
|
d_tokvecs = model.ops.alloc((tokvecs.shape[0] + 1, tokvecs.shape[1]))
|
||||||
|
model.ops.scatter_add(d_tokvecs, ids1d, d_state_features)
|
||||||
|
return (backprop_tok2vec(d_tokvecs[:-1]), None)
|
||||||
|
|
||||||
return (states, all_scores), backprop_parser
|
return (states, all_scores), backprop_parser
|
||||||
|
|
||||||
|
@ -200,6 +206,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):
|
||||||
nH = model.get_dim("nH")
|
nH = model.get_dim("nH")
|
||||||
nP = model.get_dim("nP")
|
nP = model.get_dim("nP")
|
||||||
nI = model.get_dim("nI")
|
nI = model.get_dim("nI")
|
||||||
|
assert X.shape == (X.shape[0], nI), X.shape
|
||||||
Yf_ = model.ops.gemm(X, model.ops.reshape2f(W, nF * nH * nP, nI), trans2=True)
|
Yf_ = model.ops.gemm(X, model.ops.reshape2f(W, nF * nH * nP, nI), trans2=True)
|
||||||
Yf = model.ops.reshape4f(Yf_, Yf_.shape[0], nF, nH, nP)
|
Yf = model.ops.reshape4f(Yf_, Yf_.shape[0], nF, nH, nP)
|
||||||
Yf = model.ops.xp.vstack((Yf, pad))
|
Yf = model.ops.xp.vstack((Yf, pad))
|
||||||
|
@ -226,19 +233,13 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):
|
||||||
model.inc_grad(
|
model.inc_grad(
|
||||||
"lower_pad", _backprop_precomputable_affine_padding(model, dY, ids)
|
"lower_pad", _backprop_precomputable_affine_padding(model, dY, ids)
|
||||||
)
|
)
|
||||||
print("X", X.shape)
|
|
||||||
print("ids", ids.shape)
|
|
||||||
print("dims", "nF", "nI")
|
|
||||||
print("X[ids]", X[ids].shape)
|
|
||||||
Xf = model.ops.reshape2f(X[ids], ids.shape[0], nF * nI)
|
|
||||||
|
|
||||||
model.inc_grad("lower_b", dY.sum(axis=0)) # type: ignore
|
model.inc_grad("lower_b", dY.sum(axis=0)) # type: ignore
|
||||||
dY = model.ops.reshape2f(dY, dY.shape[0], nH * nP)
|
dY = model.ops.reshape2f(dY, dY.shape[0], nH * nP)
|
||||||
|
|
||||||
Wopfi = W.transpose((1, 2, 0, 3))
|
Wopfi = W.transpose((1, 2, 0, 3))
|
||||||
Wopfi = Wopfi.reshape((nH * nP, nF * nI))
|
Wopfi = Wopfi.reshape((nH * nP, nF * nI))
|
||||||
dXf = model.ops.gemm(dY.reshape((dY.shape[0], nH * nP)), Wopfi)
|
dXf = model.ops.gemm(dY.reshape((dY.shape[0], nH * nP)), Wopfi)
|
||||||
|
ids1d = model.ops.xp.vstack(ids).flatten()
|
||||||
|
Xf = model.ops.reshape2f(X[ids1d], -1, nF * nI)
|
||||||
dWopfi = model.ops.gemm(dY, Xf, trans1=True)
|
dWopfi = model.ops.gemm(dY, Xf, trans1=True)
|
||||||
dWopfi = dWopfi.reshape((nH, nP, nF, nI))
|
dWopfi = dWopfi.reshape((nH, nP, nF, nI))
|
||||||
# (o, p, f, i) --> (f, o, p, i)
|
# (o, p, f, i) --> (f, o, p, i)
|
||||||
|
@ -250,6 +251,7 @@ def _forward_precomputable_affine(model, X: Floats2d, is_train: bool):
|
||||||
|
|
||||||
|
|
||||||
def _backprop_precomputable_affine_padding(model, dY, ids):
|
def _backprop_precomputable_affine_padding(model, dY, ids):
|
||||||
|
ids = model.ops.xp.vstack(ids)
|
||||||
nB = dY.shape[0]
|
nB = dY.shape[0]
|
||||||
nF = model.get_dim("nF")
|
nF = model.get_dim("nF")
|
||||||
nP = model.get_dim("nP")
|
nP = model.get_dim("nP")
|
||||||
|
|
|
@ -263,6 +263,7 @@ class Parser(TrainablePipe):
|
||||||
best_costs = costs.min(axis=1, keepdims=True)
|
best_costs = costs.min(axis=1, keepdims=True)
|
||||||
gscores = scores.copy()
|
gscores = scores.copy()
|
||||||
min_score = scores.min()
|
min_score = scores.min()
|
||||||
|
assert costs.shape == scores.shape, (costs.shape, scores.shape)
|
||||||
gscores[costs > best_costs] = min_score
|
gscores[costs > best_costs] = min_score
|
||||||
max_ = scores.max(axis=1, keepdims=True)
|
max_ = scores.max(axis=1, keepdims=True)
|
||||||
gmax = gscores.max(axis=1, keepdims=True)
|
gmax = gscores.max(axis=1, keepdims=True)
|
||||||
|
|
|
@ -120,33 +120,11 @@ width = ${components.tok2vec.model.width}
|
||||||
|
|
||||||
parser_config_string_upper = """
|
parser_config_string_upper = """
|
||||||
[model]
|
[model]
|
||||||
@architectures = "spacy.TransitionBasedParser.v2"
|
@architectures = "spacy.TransitionBasedParser.v3"
|
||||||
state_type = "parser"
|
state_type = "parser"
|
||||||
extra_state_tokens = false
|
extra_state_tokens = false
|
||||||
hidden_width = 66
|
hidden_width = 66
|
||||||
maxout_pieces = 2
|
maxout_pieces = 2
|
||||||
use_upper = true
|
|
||||||
|
|
||||||
[model.tok2vec]
|
|
||||||
@architectures = "spacy.HashEmbedCNN.v1"
|
|
||||||
pretrained_vectors = null
|
|
||||||
width = 333
|
|
||||||
depth = 4
|
|
||||||
embed_size = 5555
|
|
||||||
window_size = 1
|
|
||||||
maxout_pieces = 7
|
|
||||||
subword_features = false
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
|
||||||
parser_config_string_no_upper = """
|
|
||||||
[model]
|
|
||||||
@architectures = "spacy.TransitionBasedParser.v2"
|
|
||||||
state_type = "parser"
|
|
||||||
extra_state_tokens = false
|
|
||||||
hidden_width = 66
|
|
||||||
maxout_pieces = 2
|
|
||||||
use_upper = false
|
|
||||||
|
|
||||||
[model.tok2vec]
|
[model.tok2vec]
|
||||||
@architectures = "spacy.HashEmbedCNN.v1"
|
@architectures = "spacy.HashEmbedCNN.v1"
|
||||||
|
@ -177,7 +155,6 @@ def my_parser():
|
||||||
extra_state_tokens=True,
|
extra_state_tokens=True,
|
||||||
hidden_width=65,
|
hidden_width=65,
|
||||||
maxout_pieces=5,
|
maxout_pieces=5,
|
||||||
use_upper=True,
|
|
||||||
)
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
@ -264,15 +241,14 @@ def test_serialize_custom_nlp():
|
||||||
nlp.to_disk(d)
|
nlp.to_disk(d)
|
||||||
nlp2 = spacy.load(d)
|
nlp2 = spacy.load(d)
|
||||||
model = nlp2.get_pipe("parser").model
|
model = nlp2.get_pipe("parser").model
|
||||||
model.get_ref("tok2vec")
|
assert model.get_ref("tok2vec") is not None
|
||||||
# check that we have the correct settings, not the default ones
|
assert model.has_param("lower_W")
|
||||||
assert model.get_ref("upper").get_dim("nI") == 65
|
assert model.has_param("upper_W")
|
||||||
assert model.get_ref("lower").get_dim("nI") == 65
|
assert model.has_param("lower_b")
|
||||||
|
assert model.has_param("upper_b")
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper])
|
||||||
"parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
|
|
||||||
)
|
|
||||||
def test_serialize_parser(parser_config_string):
|
def test_serialize_parser(parser_config_string):
|
||||||
""" Create a non-default parser config to check nlp serializes it correctly """
|
""" Create a non-default parser config to check nlp serializes it correctly """
|
||||||
nlp = English()
|
nlp = English()
|
||||||
|
@ -285,11 +261,11 @@ def test_serialize_parser(parser_config_string):
|
||||||
nlp.to_disk(d)
|
nlp.to_disk(d)
|
||||||
nlp2 = spacy.load(d)
|
nlp2 = spacy.load(d)
|
||||||
model = nlp2.get_pipe("parser").model
|
model = nlp2.get_pipe("parser").model
|
||||||
model.get_ref("tok2vec")
|
assert model.get_ref("tok2vec") is not None
|
||||||
# check that we have the correct settings, not the default ones
|
assert model.has_param("lower_W")
|
||||||
if model.attrs["has_upper"]:
|
assert model.has_param("upper_W")
|
||||||
assert model.get_ref("upper").get_dim("nI") == 66
|
assert model.has_param("lower_b")
|
||||||
assert model.get_ref("lower").get_dim("nI") == 66
|
assert model.has_param("upper_b")
|
||||||
|
|
||||||
|
|
||||||
def test_config_nlp_roundtrip():
|
def test_config_nlp_roundtrip():
|
||||||
|
@ -436,9 +412,7 @@ def test_config_auto_fill_extra_fields():
|
||||||
load_model_from_config(nlp.config)
|
load_model_from_config(nlp.config)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
@pytest.mark.parametrize("parser_config_string", [parser_config_string_upper])
|
||||||
"parser_config_string", [parser_config_string_upper, parser_config_string_no_upper]
|
|
||||||
)
|
|
||||||
def test_config_validate_literal(parser_config_string):
|
def test_config_validate_literal(parser_config_string):
|
||||||
nlp = English()
|
nlp = English()
|
||||||
config = Config().from_str(parser_config_string)
|
config = Config().from_str(parser_config_string)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user