diff --git a/spacy/bn/tokenizer_exceptions.py b/spacy/bn/tokenizer_exceptions.py index 7722c9dcc..169608d0b 100644 --- a/spacy/bn/tokenizer_exceptions.py +++ b/spacy/bn/tokenizer_exceptions.py @@ -31,6 +31,10 @@ ABBREVIATIONS = { {ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"}, ], + "সে.মি": [ + {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"}, + {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"}, + ], } TOKENIZER_EXCEPTIONS.update(ABBREVIATIONS)