mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 18:06:29 +03:00
Merge pull request #1 from jeannefukumaru/added-indonesian-tag-map
Added indonesian tag map
This commit is contained in:
commit
99e04c4ce2
|
@ -8,6 +8,7 @@ from .norm_exceptions import NORM_EXCEPTIONS
|
||||||
from .lemmatizer import LOOKUP
|
from .lemmatizer import LOOKUP
|
||||||
from .lex_attrs import LEX_ATTRS
|
from .lex_attrs import LEX_ATTRS
|
||||||
from .syntax_iterators import SYNTAX_ITERATORS
|
from .syntax_iterators import SYNTAX_ITERATORS
|
||||||
|
from .tag_map import TAG_MAP
|
||||||
|
|
||||||
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
||||||
from ..norm_exceptions import BASE_NORMS
|
from ..norm_exceptions import BASE_NORMS
|
||||||
|
@ -30,6 +31,7 @@ class IndonesianDefaults(Language.Defaults):
|
||||||
infixes = TOKENIZER_INFIXES
|
infixes = TOKENIZER_INFIXES
|
||||||
syntax_iterators = SYNTAX_ITERATORS
|
syntax_iterators = SYNTAX_ITERATORS
|
||||||
lemma_lookup = LOOKUP
|
lemma_lookup = LOOKUP
|
||||||
|
tag_map = TAG_MAP
|
||||||
|
|
||||||
|
|
||||||
class Indonesian(Language):
|
class Indonesian(Language):
|
||||||
|
|
34
spacy/lang/id/tag_map.py
Normal file
34
spacy/lang/id/tag_map.py
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
|
||||||
|
from ...symbols import NOUN, PRON, AUX, SCONJ
|
||||||
|
|
||||||
|
|
||||||
|
# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
|
||||||
|
TAG_MAP = {
|
||||||
|
"NSD": {POS: NOUN},
|
||||||
|
"Z–": {POS: PUNCT},
|
||||||
|
"VSA": {POS: VERB},
|
||||||
|
"CC-": {POS: NUM},
|
||||||
|
"R–": {POS: ADP},
|
||||||
|
"D–": {POS: ADV},
|
||||||
|
"ASP": {POS: ADJ},
|
||||||
|
"S–": {POS: SCONJ},
|
||||||
|
"VSP": {POS: VERB},
|
||||||
|
"H–": {POS: CCONJ},
|
||||||
|
"F–": {POS: X},
|
||||||
|
"B–": {POS: DET},
|
||||||
|
"CO-": {POS: NUM},
|
||||||
|
"G–": {POS: ADV},
|
||||||
|
"PS3": {POS: PRON},
|
||||||
|
"W–": {POS: ADV},
|
||||||
|
"O–": {POS: AUX},
|
||||||
|
"PP1": {POS: PRON},
|
||||||
|
"ASS": {POS: ADJ},
|
||||||
|
"PS1": {POS: PRON},
|
||||||
|
"APP": {POS: ADJ},
|
||||||
|
"CD-": {POS: NUM},
|
||||||
|
"VPA": {POS: VERB},
|
||||||
|
"VPP": {POS: VERB},
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user