Merge pull request #3539 from jeannefukumaru/master

Added tags previously missing from Indonesian `tag_map.py`
This commit is contained in:
Ines Montani 2019-04-04 11:57:03 +02:00 committed by GitHub
commit cd21778bef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -2,28 +2,28 @@
from __future__ import unicode_literals
from ...symbols import POS, PUNCT, ADJ, CCONJ, NUM, DET, ADV, ADP, X, VERB
from ...symbols import NOUN, PRON, AUX, SCONJ
from ...symbols import NOUN, PRON, AUX, SCONJ, INTJ, PART, PROPN
# POS explanations for indonesian available from https://www.aclweb.org/anthology/Y12-1014
TAG_MAP = {
"NSD": {POS: NOUN},
"Z": {POS: PUNCT},
"Z-": {POS: PUNCT},
"VSA": {POS: VERB},
"CC-": {POS: NUM},
"R": {POS: ADP},
"D": {POS: ADV},
"R-": {POS: ADP},
"D-": {POS: ADV},
"ASP": {POS: ADJ},
"S": {POS: SCONJ},
"S-": {POS: SCONJ},
"VSP": {POS: VERB},
"H": {POS: CCONJ},
"F": {POS: X},
"B": {POS: DET},
"H-": {POS: CCONJ},
"F-": {POS: X},
"B-": {POS: DET},
"CO-": {POS: NUM},
"G": {POS: ADV},
"G--": {POS: ADV},
"PS3": {POS: PRON},
"W": {POS: ADV},
"O": {POS: AUX},
"W--": {POS: ADV},
"O-": {POS: AUX},
"PP1": {POS: PRON},
"ASS": {POS: ADJ},
"PS1": {POS: PRON},
@ -31,4 +31,63 @@ TAG_MAP = {
"CD-": {POS: NUM},
"VPA": {POS: VERB},
"VPP": {POS: VERB},
}
"X—-": {POS: X},
"CO-+PS3": {POS: NUM},
"NSD+PS3": {POS: NOUN},
"ASP+PS3": {POS: ADJ},
"M—-": {POS: AUX},
"VSA+PS3": {POS: VERB},
"R--+PS3": {POS: ADP},
"W--+T--": {POS: ADV},
"PS2": {POS:PRON},
"NSD+PS1": {POS:NOUN},
"PP3": {POS: PRON},
"VSA+T--": {POS: VERB},
"D--+T--": {POS: ADV},
"VSP+PS3": {POS: VERB},
"F--+PS3": {POS: X},
"M--+T—": {POS: AUX},
"F--+T—": {POS: X},
"PUNCT": {POS: PUNCT},
"PROPN": {POS: PROPN},
"I—-": {POS: INTJ},
"S--+PS3": {POS: SCONJ},
"ASP+T—": {POS: ADJ},
"CC-+PS3": {POS: NUM},
"NSD+PS2": {POS: NOUN},
"B--+T—": {POS: DET},
"H--+T--": {POS: CCONJ},
"VSA+PS2": {POS: VERB},
"NSF": {POS: NOUN},
"PS1+VSA": {POS: PRON},
"NPD": {POS: NOUN},
"PP2": {POS:PRON},
"VSA+PS1": {POS: VERB},
"T—": {POS: PART},
"NSM": {POS: NOUN},
"NUM": {POS: NUM},
"ASP+PS2": {POS: ADJ},
"G--+T—": {POS: PART},
"D--+PS3": {POS: ADV},
"R--+PS2": {POS: ADP},
"NSM+PS3": {POS: NOUN},
"VSP+T—": {POS: VERB},
"M--+PS3": {POS: AUX},
"ASS+PS3": {POS: ADJ},
"G--+PS3": {POS: PART},
"F--+PS1": {POS: X},
"NSD+T--": {POS: NOUN},
"PP1+T--": {POS: PRON},
"B--+PS3": {POS: DET},
"NOUN": {POS: NOUN},
"NPD+PS3": {POS: NOUN},
"R--+PS1": {POS: ADP},
"F--+PS2": {POS: X},
"CD-+PS3": {POS: NUM},
"PS1+VSA+T—": {POS: VERB},
"PS2+VSA": {POS: VERB},
"VERB": {POS: VERB},
"CC-+T—": {POS: NUM},
"NPD+PS2":{POS: NOUN},
"D--+PS2":{POS: ADV},
"PP3+T—": {POS: PRON}}