fix tokenizer_exceptions in thai

2026-02-03 05:56:15 +03:00 · 2017-09-26 22:14:47 +07:00 · 2017-09-26 22:14:47 +07:00 · 2ea27d07f4
commit 2ea27d07f4
parent a2bf4cc7bf
1 changed files with 1 additions and 38 deletions
--- a/spacy/lang/th/tokenizer_exceptions.py
+++ b/spacy/lang/th/tokenizer_exceptions.py
@ -1,9 +1,7 @@
 # encoding: utf8
 from __future__ import unicode_literals

-from ..symbols import *
-from ..language_data import PRON_LEMMA
-
+from ...symbols import *

 TOKENIZER_EXCEPTIONS = {
    "ม.ค.": [
@ -43,38 +41,3 @@ TOKENIZER_EXCEPTIONS = {
        {ORTH: "ธ.ค.", LEMMA: "ธันวาคม"}
    ]
 }
-
-
-# exceptions mapped to a single token containing only ORTH property
-# example: {"string": [{ORTH: "string"}]}
-# converted using strings_to_exc() util
-'''
-ORTH_ONLY = [
-    "a.",
-    "b.",
-    "c.",
-    "d.",
-    "e.",
-    "f.",
-    "g.",
-    "h.",
-    "i.",
-    "j.",
-    "k.",
-    "l.",
-    "m.",
-    "n.",
-    "o.",
-    "p.",
-    "q.",
-    "r.",
-    "s.",
-    "t.",
-    "u.",
-    "v.",
-    "w.",
-    "x.",
-    "y.",
-    "z."
-]
-'''