Add Thai tag map (LST20 Corpus) (#6163)

* Add Thai tag map (LST20 Corpus)

By @korakot

* Update tag_map.py

* Update tag_map.py

* Update tag_map.py
This commit is contained in:
Wannaphong Phatthiyaphaibun 2020-10-07 16:12:01 +07:00 committed by GitHub
parent 7e821c2776
commit 9fc8392b38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -16,25 +16,33 @@ TAG_MAP = {
"CMTR": {POS: NOUN}, "CMTR": {POS: NOUN},
"CFQC": {POS: NOUN}, "CFQC": {POS: NOUN},
"CVBL": {POS: NOUN}, "CVBL": {POS: NOUN},
"CL": {POS: NOUN},
"FX": {POS: NOUN},
"NN": {POS: NOUN},
# VERB # VERB
"VACT": {POS: VERB}, "VACT": {POS: VERB},
"VSTA": {POS: VERB}, "VSTA": {POS: VERB},
"VV": {POS: VERB},
# PRON # PRON
"PRON": {POS: PRON}, "PRON": {POS: PRON},
"NPRP": {POS: PRON}, "NPRP": {POS: PRON},
"PR": {POS: PRON},
# ADJ # ADJ
"ADJ": {POS: ADJ}, "ADJ": {POS: ADJ},
"NONM": {POS: ADJ}, "NONM": {POS: ADJ},
"VATT": {POS: ADJ}, "VATT": {POS: ADJ},
"DONM": {POS: ADJ}, "DONM": {POS: ADJ},
"AJ": {POS: ADJ},
# ADV # ADV
"ADV": {POS: ADV}, "ADV": {POS: ADV},
"ADVN": {POS: ADV}, "ADVN": {POS: ADV},
"ADVI": {POS: ADV}, "ADVI": {POS: ADV},
"ADVP": {POS: ADV}, "ADVP": {POS: ADV},
"ADVS": {POS: ADV}, "ADVS": {POS: ADV},
# INT "AV": {POS: ADV},
# INTJ
"INT": {POS: INTJ}, "INT": {POS: INTJ},
"IJ": {POS: INTJ},
# PRON # PRON
"PROPN": {POS: PROPN}, "PROPN": {POS: PROPN},
"PPRS": {POS: PROPN}, "PPRS": {POS: PROPN},
@ -56,6 +64,7 @@ TAG_MAP = {
"NCNM": {POS: NUM}, "NCNM": {POS: NUM},
"NLBL": {POS: NUM}, "NLBL": {POS: NUM},
"DCNM": {POS: NUM}, "DCNM": {POS: NUM},
"NU": {POS: NUM},
# AUX # AUX
"AUX": {POS: AUX}, "AUX": {POS: AUX},
"XVBM": {POS: AUX}, "XVBM": {POS: AUX},
@ -63,12 +72,15 @@ TAG_MAP = {
"XVMM": {POS: AUX}, "XVMM": {POS: AUX},
"XVBB": {POS: AUX}, "XVBB": {POS: AUX},
"XVAE": {POS: AUX}, "XVAE": {POS: AUX},
"AX": {POS: AUX},
# ADP # ADP
"ADP": {POS: ADP}, "ADP": {POS: ADP},
"RPRE": {POS: ADP}, "RPRE": {POS: ADP},
"PS": {POS: ADP},
# CCONJ # CCONJ
"CCONJ": {POS: CCONJ}, "CCONJ": {POS: CCONJ},
"JCRG": {POS: CCONJ}, "JCRG": {POS: CCONJ},
"CC": {POS: CCONJ},
# SCONJ # SCONJ
"SCONJ": {POS: SCONJ}, "SCONJ": {POS: SCONJ},
"PREL": {POS: SCONJ}, "PREL": {POS: SCONJ},
@ -82,6 +94,7 @@ TAG_MAP = {
"AITT": {POS: PART}, "AITT": {POS: PART},
"NEG": {POS: PART}, "NEG": {POS: PART},
"EITT": {POS: PART}, "EITT": {POS: PART},
"PA": {POS: PART},
# PUNCT # PUNCT
"PUNCT": {POS: PUNCT}, "PUNCT": {POS: PUNCT},
"PUNC": {POS: PUNCT}, "PUNC": {POS: PUNCT},