mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Add note on tags matching tokenization (see #1613)
This commit is contained in:
		
							parent
							
								
									ac235c0baf
								
							
						
					
					
						commit
						ec08996000
					
				| 
						 | 
				
			
			@ -30,8 +30,11 @@ TAG_MAP = {
 | 
			
		|||
    'J': {'pos': 'ADJ'}
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
# Usually you'll read this in, of course. Data formats vary.
 | 
			
		||||
# Ensure your strings are unicode.
 | 
			
		||||
# Usually you'll read this in, of course. Data formats vary. Ensure your
 | 
			
		||||
# strings are unicode and that the number of tags assigned matches spaCy's
 | 
			
		||||
# tokenization. If not, you can always add a 'words' key to the annotations
 | 
			
		||||
# that specifies the gold-standard tokenization, e.g.:
 | 
			
		||||
# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
 | 
			
		||||
TRAIN_DATA = [
 | 
			
		||||
    ("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
 | 
			
		||||
    ("Eat blue ham", {'tags': ['V', 'J', 'N']})
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue
	
	Block a user