update tokenizertokenizer

This commit is contained in:
shuvanon 2017-03-08 17:29:39 +06:00
parent 45bc78461c
commit 85438aee1b

View File

@ -35,6 +35,9 @@ ABBREVIATIONS = {
{ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"},
{ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"},
],
"মি.লি.": [
{ORTH: "মি.লি.", LEMMA: "মিলিলিটার"},
]
}
TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)