mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Add note on tags matching tokenization (see #1613)
This commit is contained in:
		
							parent
							
								
									ac235c0baf
								
							
						
					
					
						commit
						ec08996000
					
				| 
						 | 
					@ -30,8 +30,11 @@ TAG_MAP = {
 | 
				
			||||||
    'J': {'pos': 'ADJ'}
 | 
					    'J': {'pos': 'ADJ'}
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# Usually you'll read this in, of course. Data formats vary.
 | 
					# Usually you'll read this in, of course. Data formats vary. Ensure your
 | 
				
			||||||
# Ensure your strings are unicode.
 | 
					# strings are unicode and that the number of tags assigned matches spaCy's
 | 
				
			||||||
 | 
					# tokenization. If not, you can always add a 'words' key to the annotations
 | 
				
			||||||
 | 
					# that specifies the gold-standard tokenization, e.g.:
 | 
				
			||||||
 | 
					# ("Eatblueham", {'words': ['Eat', 'blue', 'ham'] 'tags': ['V', 'J', 'N']})
 | 
				
			||||||
TRAIN_DATA = [
 | 
					TRAIN_DATA = [
 | 
				
			||||||
    ("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
 | 
					    ("I like green eggs", {'tags': ['N', 'V', 'J', 'N']}),
 | 
				
			||||||
    ("Eat blue ham", {'tags': ['V', 'J', 'N']})
 | 
					    ("Eat blue ham", {'tags': ['V', 'J', 'N']})
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user