mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Merge pull request #1 from jeannefukumaru/added-indonesian-tag-map
Added indonesian tag map
This commit is contained in:
		
						commit
						99e04c4ce2
					
				| 
						 | 
					@ -8,6 +8,7 @@ from .norm_exceptions import NORM_EXCEPTIONS
 | 
				
			||||||
from .lemmatizer import LOOKUP
 | 
					from .lemmatizer import LOOKUP
 | 
				
			||||||
from .lex_attrs import LEX_ATTRS
 | 
					from .lex_attrs import LEX_ATTRS
 | 
				
			||||||
from .syntax_iterators import SYNTAX_ITERATORS
 | 
					from .syntax_iterators import SYNTAX_ITERATORS
 | 
				
			||||||
 | 
					from .tag_map import TAG_MAP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
					from ..tokenizer_exceptions import BASE_EXCEPTIONS
 | 
				
			||||||
from ..norm_exceptions import BASE_NORMS
 | 
					from ..norm_exceptions import BASE_NORMS
 | 
				
			||||||
| 
						 | 
					@ -30,6 +31,7 @@ class IndonesianDefaults(Language.Defaults):
 | 
				
			||||||
    infixes = TOKENIZER_INFIXES
 | 
					    infixes = TOKENIZER_INFIXES
 | 
				
			||||||
    syntax_iterators = SYNTAX_ITERATORS
 | 
					    syntax_iterators = SYNTAX_ITERATORS
 | 
				
			||||||
    lemma_lookup = LOOKUP
 | 
					    lemma_lookup = LOOKUP
 | 
				
			||||||
 | 
					    tag_map = TAG_MAP
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class Indonesian(Language):
 | 
					class Indonesian(Language):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										34
									
								
								spacy/lang/id/tag_map.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								spacy/lang/id/tag_map.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,34 @@
 | 
				
			||||||
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
 | 
				
			||||||
 | 
					from ...symbols import NOUN, PRON, AUX, SCONJ
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
 | 
				
			||||||
 | 
					TAG_MAP = {
 | 
				
			||||||
 | 
					    "NSD": {POS: NOUN},
 | 
				
			||||||
 | 
					    "Z–": {POS: PUNCT},
 | 
				
			||||||
 | 
					    "VSA": {POS: VERB},
 | 
				
			||||||
 | 
					    "CC-": {POS: NUM},
 | 
				
			||||||
 | 
					    "R–": {POS: ADP},
 | 
				
			||||||
 | 
					    "D–": {POS: ADV},
 | 
				
			||||||
 | 
					    "ASP": {POS: ADJ},
 | 
				
			||||||
 | 
					    "S–": {POS: SCONJ},
 | 
				
			||||||
 | 
					    "VSP": {POS: VERB},
 | 
				
			||||||
 | 
					    "H–": {POS: CCONJ},
 | 
				
			||||||
 | 
					    "F–": {POS: X},
 | 
				
			||||||
 | 
					    "B–": {POS: DET},
 | 
				
			||||||
 | 
					    "CO-": {POS: NUM},
 | 
				
			||||||
 | 
					    "G–": {POS: ADV},
 | 
				
			||||||
 | 
					    "PS3": {POS: PRON},
 | 
				
			||||||
 | 
					    "W–": {POS: ADV},
 | 
				
			||||||
 | 
					    "O–": {POS: AUX},
 | 
				
			||||||
 | 
					    "PP1": {POS: PRON},
 | 
				
			||||||
 | 
					    "ASS": {POS: ADJ},
 | 
				
			||||||
 | 
					    "PS1": {POS: PRON},
 | 
				
			||||||
 | 
					    "APP": {POS: ADJ},
 | 
				
			||||||
 | 
					    "CD-": {POS: NUM},
 | 
				
			||||||
 | 
					    "VPA": {POS: VERB},
 | 
				
			||||||
 | 
					    "VPP": {POS: VERB},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user