mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Prevent subtok label if not learning tokens The parser introduces the subtok label to mark tokens that should be merged during post-processing. Previously this happened even if we did not have the --learn-tokens flag set. This patch passes the config through to the parser, to prevent the problem. * Make merge_subtokens a parser post-process if learn_subtokens * Fix train script * Add test for 3830: subtok problem * Fix handlign of non-subtok in parser training
		
			
				
	
	
		
			21 lines
		
	
	
		
			696 B
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			21 lines
		
	
	
		
			696 B
		
	
	
	
		
			Python
		
	
	
	
	
	
from spacy.pipeline.pipes import DependencyParser
 | 
						|
from spacy.vocab import Vocab
 | 
						|
 | 
						|
 | 
						|
def test_issue3830_no_subtok():
 | 
						|
    """Test that the parser doesn't have subtok label if not learn_tokens"""
 | 
						|
    parser = DependencyParser(Vocab())
 | 
						|
    parser.add_label("nsubj")
 | 
						|
    assert "subtok" not in parser.labels
 | 
						|
    parser.begin_training(lambda: [])
 | 
						|
    assert "subtok" not in parser.labels
 | 
						|
 | 
						|
 | 
						|
def test_issue3830_with_subtok():
 | 
						|
    """Test that the parser does have subtok label if learn_tokens=True."""
 | 
						|
    parser = DependencyParser(Vocab(), learn_tokens=True)
 | 
						|
    parser.add_label("nsubj")
 | 
						|
    assert "subtok" not in parser.labels
 | 
						|
    parser.begin_training(lambda: [])
 | 
						|
    assert "subtok" in parser.labels
 |