diff --git a/spacy/bn/tokenizer_exceptions.py b/spacy/bn/tokenizer_exceptions.py index 7722c9dcc..a47b89280 100644 --- a/spacy/bn/tokenizer_exceptions.py +++ b/spacy/bn/tokenizer_exceptions.py @@ -27,10 +27,21 @@ ABBREVIATIONS = { "সে.": [ {ORTH: "সে.", LEMMA: "সেলসিয়াস"}, ], - "কি.মি": [ - {ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, + "কি.মি.": [ {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"}, ], + "কি.মি": [ + {ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, + ], + "সে.মি.": [ + {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"}, + ], + "সে.মি": [ + {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"}, + ], + "মি.লি.": [ + {ORTH: "মি.লি.", LEMMA: "মিলিলিটার"}, + ] } TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)