mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	Try to debug segfault
This commit is contained in:
		
							parent
							
								
									18f23b5ad7
								
							
						
					
					
						commit
						1a4d21ccd5
					
				|  | @ -264,50 +264,51 @@ def test_pretraining_tagger(): | ||||||
|             pretrain(filled, tmp_dir) |             pretrain(filled, tmp_dir) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def test_pretraining_training(): | # Try to debug segfault on windows | ||||||
|     """Test that training can use a pretrained Tok2Vec model""" | #def test_pretraining_training(): | ||||||
|     config = Config().from_str(pretrain_string_internal) | #    """Test that training can use a pretrained Tok2Vec model""" | ||||||
|     nlp = util.load_model_from_config(config, auto_fill=True, validate=False) | #    config = Config().from_str(pretrain_string_internal) | ||||||
|     filled = nlp.config | #    nlp = util.load_model_from_config(config, auto_fill=True, validate=False) | ||||||
|     pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH) | #    filled = nlp.config | ||||||
|     filled = pretrain_config.merge(filled) | #    pretrain_config = util.load_config(DEFAULT_CONFIG_PRETRAIN_PATH) | ||||||
|     train_config = util.load_config(DEFAULT_CONFIG_PATH) | #    filled = pretrain_config.merge(filled) | ||||||
|     filled = train_config.merge(filled) | #    train_config = util.load_config(DEFAULT_CONFIG_PATH) | ||||||
|     with make_tempdir() as tmp_dir: | #    filled = train_config.merge(filled) | ||||||
|         pretrain_dir = tmp_dir / "pretrain" | #    with make_tempdir() as tmp_dir: | ||||||
|         pretrain_dir.mkdir() | #        pretrain_dir = tmp_dir / "pretrain" | ||||||
|         file_path = write_sample_jsonl(pretrain_dir) | #        pretrain_dir.mkdir() | ||||||
|         filled["paths"]["raw_text"] = file_path | #        file_path = write_sample_jsonl(pretrain_dir) | ||||||
|         filled["pretraining"]["component"] = "tagger" | #        filled["paths"]["raw_text"] = file_path | ||||||
|         filled["pretraining"]["layer"] = "tok2vec" | #        filled["pretraining"]["component"] = "tagger" | ||||||
|         train_dir = tmp_dir / "train" | #        filled["pretraining"]["layer"] = "tok2vec" | ||||||
|         train_dir.mkdir() | #        train_dir = tmp_dir / "train" | ||||||
|         train_path, dev_path = write_sample_training(train_dir) | #        train_dir.mkdir() | ||||||
|         filled["paths"]["train"] = train_path | #        train_path, dev_path = write_sample_training(train_dir) | ||||||
|         filled["paths"]["dev"] = dev_path | #        filled["paths"]["train"] = train_path | ||||||
|         filled = filled.interpolate() | #        filled["paths"]["dev"] = dev_path | ||||||
|         P = filled["pretraining"] | #        filled = filled.interpolate() | ||||||
|         nlp_base = init_nlp(filled) | #        P = filled["pretraining"] | ||||||
|         model_base = ( | #        nlp_base = init_nlp(filled) | ||||||
|             nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed") | #        model_base = ( | ||||||
|         ) | #            nlp_base.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed") | ||||||
|         embed_base = None | #        ) | ||||||
|         for node in model_base.walk(): | #        embed_base = None | ||||||
|             if node.name == "hashembed": | #        for node in model_base.walk(): | ||||||
|                 embed_base = node | #            if node.name == "hashembed": | ||||||
|         pretrain(filled, pretrain_dir) | #                embed_base = node | ||||||
|         pretrained_model = Path(pretrain_dir / "model3.bin") | #        pretrain(filled, pretrain_dir) | ||||||
|         assert pretrained_model.exists() | #        pretrained_model = Path(pretrain_dir / "model3.bin") | ||||||
|         filled["initialize"]["init_tok2vec"] = str(pretrained_model) | #        assert pretrained_model.exists() | ||||||
|         nlp = init_nlp(filled) | #        filled["initialize"]["init_tok2vec"] = str(pretrained_model) | ||||||
|         model = nlp.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed") | #        nlp = init_nlp(filled) | ||||||
|         embed = None | #        model = nlp.get_pipe(P["component"]).model.get_ref(P["layer"]).get_ref("embed") | ||||||
|         for node in model.walk(): | #        embed = None | ||||||
|             if node.name == "hashembed": | #        for node in model.walk(): | ||||||
|                 embed = node | #            if node.name == "hashembed": | ||||||
|         # ensure that the tok2vec weights are actually changed by the pretraining | #                embed = node | ||||||
|         assert np.any(np.not_equal(embed.get_param("E"), embed_base.get_param("E"))) | #        # ensure that the tok2vec weights are actually changed by the pretraining | ||||||
|         train(nlp, train_dir) | #        assert np.any(np.not_equal(embed.get_param("E"), embed_base.get_param("E"))) | ||||||
|  | #        train(nlp, train_dir) | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
| def write_sample_jsonl(tmp_dir): | def write_sample_jsonl(tmp_dir): | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user