From 9fc8392b3827d009e7b54197e84f2d5b15e72cdf Mon Sep 17 00:00:00 2001 From: Wannaphong Phatthiyaphaibun Date: Wed, 7 Oct 2020 16:12:01 +0700 Subject: [PATCH] Add Thai tag map (LST20 Corpus) (#6163) * Add Thai tag map (LST20 Corpus) By @korakot * Update tag_map.py * Update tag_map.py * Update tag_map.py --- spacy/lang/th/tag_map.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/spacy/lang/th/tag_map.py b/spacy/lang/th/tag_map.py index 119a2f6a0..3c0d3479b 100644 --- a/spacy/lang/th/tag_map.py +++ b/spacy/lang/th/tag_map.py @@ -16,25 +16,33 @@ TAG_MAP = { "CMTR": {POS: NOUN}, "CFQC": {POS: NOUN}, "CVBL": {POS: NOUN}, + "CL": {POS: NOUN}, + "FX": {POS: NOUN}, + "NN": {POS: NOUN}, # VERB "VACT": {POS: VERB}, "VSTA": {POS: VERB}, + "VV": {POS: VERB}, # PRON "PRON": {POS: PRON}, "NPRP": {POS: PRON}, + "PR": {POS: PRON}, # ADJ "ADJ": {POS: ADJ}, "NONM": {POS: ADJ}, "VATT": {POS: ADJ}, "DONM": {POS: ADJ}, + "AJ": {POS: ADJ}, # ADV "ADV": {POS: ADV}, "ADVN": {POS: ADV}, "ADVI": {POS: ADV}, "ADVP": {POS: ADV}, "ADVS": {POS: ADV}, - # INT + "AV": {POS: ADV}, + # INTJ "INT": {POS: INTJ}, + "IJ": {POS: INTJ}, # PRON "PROPN": {POS: PROPN}, "PPRS": {POS: PROPN}, @@ -56,6 +64,7 @@ TAG_MAP = { "NCNM": {POS: NUM}, "NLBL": {POS: NUM}, "DCNM": {POS: NUM}, + "NU": {POS: NUM}, # AUX "AUX": {POS: AUX}, "XVBM": {POS: AUX}, @@ -63,12 +72,15 @@ TAG_MAP = { "XVMM": {POS: AUX}, "XVBB": {POS: AUX}, "XVAE": {POS: AUX}, + "AX": {POS: AUX}, # ADP "ADP": {POS: ADP}, "RPRE": {POS: ADP}, + "PS": {POS: ADP}, # CCONJ "CCONJ": {POS: CCONJ}, "JCRG": {POS: CCONJ}, + "CC": {POS: CCONJ}, # SCONJ "SCONJ": {POS: SCONJ}, "PREL": {POS: SCONJ}, @@ -82,6 +94,7 @@ TAG_MAP = { "AITT": {POS: PART}, "NEG": {POS: PART}, "EITT": {POS: PART}, + "PA": {POS: PART}, # PUNCT "PUNCT": {POS: PUNCT}, "PUNC": {POS: PUNCT},