Merge pull request #2 from jeannefukumaru/update_indonesian_tag_map

updated tag map with missing tags
This commit is contained in:
jeannefukumaru 2019-04-04 06:49:04 +08:00 committed by GitHub
commit eba4f77526
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,33 +2,94 @@
from __future__ import unicode_literals
from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PRON, AUX, SCONJ
from ...symbols import NOUN, PRON, AUX, SCONJ, INTJ, PART, PROPN
# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
TAG_MAP = {
"NSD": {POS: NOUN},
"Z": {POS: PUNCT},
"VSA": {POS: VERB},
"CC-": {POS: NUM},
"R": {POS: ADP},
"D": {POS: ADV},
"ASP": {POS: ADJ},
"S": {POS: SCONJ},
"VSP": {POS: VERB},
"H": {POS: CCONJ},
"F": {POS: X},
"B": {POS: DET},
"CO-": {POS: NUM},
"G": {POS: ADV},
"PS3": {POS: PRON},
"W": {POS: ADV},
"O": {POS: AUX},
"PP1": {POS: PRON},
"ASS": {POS: ADJ},
"PS1": {POS: PRON},
"APP": {POS: ADJ},
"CD-": {POS: NUM},
"VPA": {POS: VERB},
"VPP": {POS: VERB},
"NSD": {POS: NOUN},
"Z-": {POS: PUNCT},
"VSA": {POS: VERB},
"CC-": {POS: NUM},
"R-": {POS: ADP},
"D-": {POS: ADV},
"ASP": {POS: ADJ},
"S-": {POS: SCONJ},
"VSP": {POS: VERB},
"H-": {POS: CCONJ},
"F-": {POS: X},
"B-": {POS: DET},
"CO-": {POS: NUM},
"G--": {POS: ADV},
"PS3": {POS: PRON},
"W--": {POS: ADV},
"O-": {POS: AUX},
"PP1": {POS: PRON},
"ASS": {POS: ADJ},
"PS1": {POS: PRON},
"APP": {POS: ADJ},
"CD-": {POS: NUM},
"VPA": {POS: VERB},
"VPP": {POS: VERB},
"X—-": {POS: X},
"CO-+PS3": {POS: NUM},
"NSD+PS3": {POS: NOUN},
"ASP+PS3": {POS: ADJ},
"M—-": {POS: AUX},
"VSA+PS3": {POS: VERB},
"R--+PS3": {POS: ADP},
"W--+T--": {POS: ADV},
"PS2": {POS:PRON},
"NSD+PS1": {POS:NOUN},
"PP3": {POS: PRON},
"VSA+T--": {POS: VERB},
"D--+T--": {POS: ADV},
"VSP+PS3": {POS: VERB},
"F--+PS3": {POS: X},
"M--+T—": {POS: AUX},
"F--+T—": {POS: X},
"PUNCT": {POS: PUNCT},
"PROPN": {POS: PROPN},
"I—-": {POS: INTJ},
"S--+PS3": {POS: SCONJ},
"ASP+T—": {POS: ADJ},
"CC-+PS3": {POS: NUM},
"NSD+PS2": {POS: NOUN},
"B--+T—": {POS: DET},
"H--+T--": {POS: CCONJ},
"VSA+PS2": {POS: VERB},
"NSF": {POS: NOUN},
"PS1+VSA": {POS: PRON},
"NPD": {POS: NOUN},
"PP2": {POS:PRON},
"VSA+PS1": {POS: VERB},
"T—": {POS: PART},
"NSM": {POS: NOUN},
"NUM": {POS: NUM},
"ASP+PS2": {POS: ADJ},
"G--+T—": {POS: PART},
"D--+PS3": {POS: ADV},
"R--+PS2": {POS: ADP},
"NSM+PS3": {POS: NOUN},
"VSP+T—": {POS: VERB},
"M--+PS3": {POS: AUX},
"ASS+PS3": {POS: ADJ},
"G--+PS3": {POS: PART},
"F--+PS1": {POS: X},
"NSD+T--": {POS: NOUN},
"PP1+T--": {POS: PRON},
"B--+PS3": {POS: DET},
"NOUN": {POS: NOUN},
"NPD+PS3": {POS: NOUN},
"R--+PS1": {POS: ADP},
"F--+PS2": {POS: X},
"CD-+PS3": {POS: NUM},
"PS1+VSA+T—": {POS: VERB},
"PS2+VSA": {POS: VERB},
"VERB": {POS: VERB},
"CC-+T—": {POS: NUM},
"NPD+PS2":{POS: NOUN},
"D--+PS2":{POS: ADV},
"PP3+T—": {POS: PRON}
}