mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
Try to debug segfault
This commit is contained in:
parent
18f23b5ad7
commit
1a4d21ccd5
|
@ -264,50 +264,51 @@ def test_pretraining_tagger():
|
||||||
pretrain(filled, tmp_dir)
|
pretrain(filled, tmp_dir)
|
||||||
|
|
||||||
|
|
||||||
def test_pretraining_training():
|
# Try to debug segfault on windows
|
||||||
"""Test that training can use a pretrained Tok2Vec model"""
|
#def test_pretraining_training():
|
||||||
config = Config().from_str(pretrain_string_internal)
|
# """Test that training can use a pretrained Tok2Vec model"""
|
||||||
nlp = util.load_model_from_config(config, auto_fill=True, validate=False)
|
# config = Config().from_str(pretrain_string_internal)
|
||||||
filled = nlp.config
|
# nlp = util.load_model_from_config(config, auto_fill=True, validate=False)
|
||||||
pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
|
# filled = nlp.config
|
||||||
filled = pretrain_config.merge(filled)
|
# pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH)
|
||||||
train_config = util.load_config(DEFAULT_CONFIG_PATH)
|
# filled = pretrain_config.merge(filled)
|
||||||
filled = train_config.merge(filled)
|
# train_config = util.load_config(DEFAULT_CONFIG_PATH)
|
||||||
with make_tempdir() as tmp_dir:
|
# filled = train_config.merge(filled)
|
||||||
pretrain_dir = tmp_dir / "pretrain"
|
# with make_tempdir() as tmp_dir:
|
||||||
pretrain_dir.mkdir()
|
# pretrain_dir = tmp_dir / "pretrain"
|
||||||
file_path = write_sample_jsonl(pretrain_dir)
|
# pretrain_dir.mkdir()
|
||||||
filled["paths"]["raw_text"] = file_path
|
# file_path = write_sample_jsonl(pretrain_dir)
|
||||||
filled["pretraining"]["component"] = "tagger"
|
# filled["paths"]["raw_text"] = file_path
|
||||||
filled["pretraining"]["layer"] = "tok2vec"
|
# filled["pretraining"]["component"] = "tagger"
|
||||||
train_dir = tmp_dir / "train"
|
# filled["pretraining"]["layer"] = "tok2vec"
|
||||||
train_dir.mkdir()
|
# train_dir = tmp_dir / "train"
|
||||||
train_path, dev_path = write_sample_training(train_dir)
|
# train_dir.mkdir()
|
||||||
filled["paths"]["train"] = train_path
|
# train_path, dev_path = write_sample_training(train_dir)
|
||||||
filled["paths"]["dev"] = dev_path
|
# filled["paths"]["train"] = train_path
|
||||||
filled = filled.interpolate()
|
# filled["paths"]["dev"] = dev_path
|
||||||
P = filled["pretraining"]
|
# filled = filled.interpolate()
|
||||||
nlp_base = init_nlp(filled)
|
# P = filled["pretraining"]
|
||||||
model_base = (
|
# nlp_base = init_nlp(filled)
|
||||||
nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
|
# model_base = (
|
||||||
)
|
# nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
|
||||||
embed_base = None
|
# )
|
||||||
for node in model_base.walk():
|
# embed_base = None
|
||||||
if node.name == "hashembed":
|
# for node in model_base.walk():
|
||||||
embed_base = node
|
# if node.name == "hashembed":
|
||||||
pretrain(filled, pretrain_dir)
|
# embed_base = node
|
||||||
pretrained_model = Path(pretrain_dir / "model3.bin")
|
# pretrain(filled, pretrain_dir)
|
||||||
assert pretrained_model.exists()
|
# pretrained_model = Path(pretrain_dir / "model3.bin")
|
||||||
filled["initialize"]["init_tok2vec"] = str(pretrained_model)
|
# assert pretrained_model.exists()
|
||||||
nlp = init_nlp(filled)
|
# filled["initialize"]["init_tok2vec"] = str(pretrained_model)
|
||||||
model = nlp.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
|
# nlp = init_nlp(filled)
|
||||||
embed = None
|
# model = nlp.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed")
|
||||||
for node in model.walk():
|
# embed = None
|
||||||
if node.name == "hashembed":
|
# for node in model.walk():
|
||||||
embed = node
|
# if node.name == "hashembed":
|
||||||
# ensure that the tok2vec weights are actually changed by the pretraining
|
# embed = node
|
||||||
assert np.any(np.not_equal(embed.get_param("E"), embed_base.get_param("E")))
|
# # ensure that the tok2vec weights are actually changed by the pretraining
|
||||||
train(nlp, train_dir)
|
# assert np.any(np.not_equal(embed.get_param("E"), embed_base.get_param("E")))
|
||||||
|
# train(nlp, train_dir)
|
||||||
|
|
||||||
|
|
||||||
def write_sample_jsonl(tmp_dir):
|
def write_sample_jsonl(tmp_dir):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user