From a624ae06756cb883c4a6b870f58e4bb3f39d9e45 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Wed, 22 Jul 2020 23:09:01 +0200 Subject: [PATCH] Remove POS, TAG and LEMMA from tokenizer exceptions --- spacy/lang/ar/tokenizer_exceptions.py | 46 +- spacy/lang/bn/tokenizer_exceptions.py | 26 +- spacy/lang/ca/tokenizer_exceptions.py | 44 +- spacy/lang/da/tokenizer_exceptions.py | 112 +- spacy/lang/de/tokenizer_exceptions.py | 270 +- spacy/lang/el/tokenizer_exceptions.py | 178 +- spacy/lang/en/tokenizer_exceptions.py | 475 ++- spacy/lang/es/tokenizer_exceptions.py | 36 +- spacy/lang/fa/tokenizer_exceptions.py | 3494 ++++------------- spacy/lang/fi/tokenizer_exceptions.py | 138 +- spacy/lang/fr/tokenizer_exceptions.py | 92 +- spacy/lang/ga/tokenizer_exceptions.py | 124 +- spacy/lang/id/tokenizer_exceptions.py | 52 +- spacy/lang/it/tokenizer_exceptions.py | 4 +- spacy/lang/lb/tokenizer_exceptions.py | 28 +- spacy/lang/lij/tokenizer_exceptions.py | 71 +- spacy/lang/nb/tokenizer_exceptions.py | 24 +- spacy/lang/ru/tokenizer_exceptions.py | 84 +- spacy/lang/sr/tokenizer_exceptions.py | 130 +- spacy/lang/sv/tokenizer_exceptions.py | 87 +- spacy/lang/th/tokenizer_exceptions.py | 865 ++-- spacy/lang/tl/tokenizer_exceptions.py | 20 +- spacy/lang/tokenizer_exceptions.py | 14 +- spacy/lang/tt/tokenizer_exceptions.py | 49 +- spacy/lang/uk/tokenizer_exceptions.py | 28 +- spacy/tests/lang/ar/test_exceptions.py | 1 - spacy/tests/lang/ca/test_exception.py | 2 - spacy/tests/lang/de/test_exceptions.py | 1 - spacy/tests/lang/en/test_exceptions.py | 2 - spacy/tests/lang/es/test_exception.py | 2 - spacy/tests/lang/fr/test_exceptions.py | 19 +- spacy/tests/lang/lb/test_exceptions.py | 1 - spacy/tests/regression/test_issue1501-2000.py | 2 - spacy/tests/regression/test_issue2501-3000.py | 1 - 34 files changed, 2173 insertions(+), 4349 deletions(-) diff --git a/spacy/lang/ar/tokenizer_exceptions.py b/spacy/lang/ar/tokenizer_exceptions.py index ce0f91ef7..7c385bef8 100644 --- a/spacy/lang/ar/tokenizer_exceptions.py +++ b/spacy/lang/ar/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc @@ -8,41 +8,41 @@ _exc = {} # Time for exc_data in [ - {LEMMA: "قبل الميلاد", ORTH: "ق.م"}, - {LEMMA: "بعد الميلاد", ORTH: "ب. م"}, - {LEMMA: "ميلادي", ORTH: ".م"}, - {LEMMA: "هجري", ORTH: ".هـ"}, - {LEMMA: "توفي", ORTH: ".ت"}, + {NORM: "قبل الميلاد", ORTH: "ق.م"}, + {NORM: "بعد الميلاد", ORTH: "ب. م"}, + {NORM: "ميلادي", ORTH: ".م"}, + {NORM: "هجري", ORTH: ".هـ"}, + {NORM: "توفي", ORTH: ".ت"}, ]: _exc[exc_data[ORTH]] = [exc_data] # Scientific abv. for exc_data in [ - {LEMMA: "صلى الله عليه وسلم", ORTH: "صلعم"}, - {LEMMA: "الشارح", ORTH: "الشـ"}, - {LEMMA: "الظاهر", ORTH: "الظـ"}, - {LEMMA: "أيضًا", ORTH: "أيضـ"}, - {LEMMA: "إلى آخره", ORTH: "إلخ"}, - {LEMMA: "انتهى", ORTH: "اهـ"}, - {LEMMA: "حدّثنا", ORTH: "ثنا"}, - {LEMMA: "حدثني", ORTH: "ثنى"}, - {LEMMA: "أنبأنا", ORTH: "أنا"}, - {LEMMA: "أخبرنا", ORTH: "نا"}, - {LEMMA: "مصدر سابق", ORTH: "م. س"}, - {LEMMA: "مصدر نفسه", ORTH: "م. ن"}, + {NORM: "صلى الله عليه وسلم", ORTH: "صلعم"}, + {NORM: "الشارح", ORTH: "الشـ"}, + {NORM: "الظاهر", ORTH: "الظـ"}, + {NORM: "أيضًا", ORTH: "أيضـ"}, + {NORM: "إلى آخره", ORTH: "إلخ"}, + {NORM: "انتهى", ORTH: "اهـ"}, + {NORM: "حدّثنا", ORTH: "ثنا"}, + {NORM: "حدثني", ORTH: "ثنى"}, + {NORM: "أنبأنا", ORTH: "أنا"}, + {NORM: "أخبرنا", ORTH: "نا"}, + {NORM: "مصدر سابق", ORTH: "م. س"}, + {NORM: "مصدر نفسه", ORTH: "م. ن"}, ]: _exc[exc_data[ORTH]] = [exc_data] # Other abv. for exc_data in [ - {LEMMA: "دكتور", ORTH: "د."}, - {LEMMA: "أستاذ دكتور", ORTH: "أ.د"}, - {LEMMA: "أستاذ", ORTH: "أ."}, - {LEMMA: "بروفيسور", ORTH: "ب."}, + {NORM: "دكتور", ORTH: "د."}, + {NORM: "أستاذ دكتور", ORTH: "أ.د"}, + {NORM: "أستاذ", ORTH: "أ."}, + {NORM: "بروفيسور", ORTH: "ب."}, ]: _exc[exc_data[ORTH]] = [exc_data] -for exc_data in [{LEMMA: "تلفون", ORTH: "ت."}, {LEMMA: "صندوق بريد", ORTH: "ص.ب"}]: +for exc_data in [{NORM: "تلفون", ORTH: "ت."}, {NORM: "صندوق بريد", ORTH: "ص.ب"}]: _exc[exc_data[ORTH]] = [exc_data] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/bn/tokenizer_exceptions.py b/spacy/lang/bn/tokenizer_exceptions.py index d896b4914..e666522b8 100644 --- a/spacy/lang/bn/tokenizer_exceptions.py +++ b/spacy/lang/bn/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc @@ -7,18 +7,18 @@ _exc = {} for exc_data in [ - {ORTH: "ডঃ", LEMMA: "ডক্টর"}, - {ORTH: "ডাঃ", LEMMA: "ডাক্তার"}, - {ORTH: "ড.", LEMMA: "ডক্টর"}, - {ORTH: "ডা.", LEMMA: "ডাক্তার"}, - {ORTH: "মোঃ", LEMMA: "মোহাম্মদ"}, - {ORTH: "মো.", LEMMA: "মোহাম্মদ"}, - {ORTH: "সে.", LEMMA: "সেলসিয়াস"}, - {ORTH: "কি.মি.", LEMMA: "কিলোমিটার"}, - {ORTH: "কি.মি", LEMMA: "কিলোমিটার"}, - {ORTH: "সে.মি.", LEMMA: "সেন্টিমিটার"}, - {ORTH: "সে.মি", LEMMA: "সেন্টিমিটার"}, - {ORTH: "মি.লি.", LEMMA: "মিলিলিটার"}, + {ORTH: "ডঃ", NORM: "ডক্টর"}, + {ORTH: "ডাঃ", NORM: "ডাক্তার"}, + {ORTH: "ড.", NORM: "ডক্টর"}, + {ORTH: "ডা.", NORM: "ডাক্তার"}, + {ORTH: "মোঃ", NORM: "মোহাম্মদ"}, + {ORTH: "মো.", NORM: "মোহাম্মদ"}, + {ORTH: "সে.", NORM: "সেলসিয়াস"}, + {ORTH: "কি.মি.", NORM: "কিলোমিটার"}, + {ORTH: "কি.মি", NORM: "কিলোমিটার"}, + {ORTH: "সে.মি.", NORM: "সেন্টিমিটার"}, + {ORTH: "সে.মি", NORM: "সেন্টিমিটার"}, + {ORTH: "মি.লি.", NORM: "মিলিলিটার"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/ca/tokenizer_exceptions.py b/spacy/lang/ca/tokenizer_exceptions.py index 6928de46b..b465e97ba 100644 --- a/spacy/lang/ca/tokenizer_exceptions.py +++ b/spacy/lang/ca/tokenizer_exceptions.py @@ -1,40 +1,40 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc _exc = {} for exc_data in [ - {ORTH: "aprox.", LEMMA: "aproximadament"}, - {ORTH: "pàg.", LEMMA: "pàgina"}, - {ORTH: "p.ex.", LEMMA: "per exemple"}, - {ORTH: "gen.", LEMMA: "gener"}, - {ORTH: "feb.", LEMMA: "febrer"}, - {ORTH: "abr.", LEMMA: "abril"}, - {ORTH: "jul.", LEMMA: "juliol"}, - {ORTH: "set.", LEMMA: "setembre"}, - {ORTH: "oct.", LEMMA: "octubre"}, - {ORTH: "nov.", LEMMA: "novembre"}, - {ORTH: "dec.", LEMMA: "desembre"}, - {ORTH: "Dr.", LEMMA: "doctor"}, - {ORTH: "Sr.", LEMMA: "senyor"}, - {ORTH: "Sra.", LEMMA: "senyora"}, - {ORTH: "Srta.", LEMMA: "senyoreta"}, - {ORTH: "núm", LEMMA: "número"}, - {ORTH: "St.", LEMMA: "sant"}, - {ORTH: "Sta.", LEMMA: "santa"}, + {ORTH: "aprox.", NORM: "aproximadament"}, + {ORTH: "pàg.", NORM: "pàgina"}, + {ORTH: "p.ex.", NORM: "per exemple"}, + {ORTH: "gen.", NORM: "gener"}, + {ORTH: "feb.", NORM: "febrer"}, + {ORTH: "abr.", NORM: "abril"}, + {ORTH: "jul.", NORM: "juliol"}, + {ORTH: "set.", NORM: "setembre"}, + {ORTH: "oct.", NORM: "octubre"}, + {ORTH: "nov.", NORM: "novembre"}, + {ORTH: "dec.", NORM: "desembre"}, + {ORTH: "Dr.", NORM: "doctor"}, + {ORTH: "Sr.", NORM: "senyor"}, + {ORTH: "Sra.", NORM: "senyora"}, + {ORTH: "Srta.", NORM: "senyoreta"}, + {ORTH: "núm", NORM: "número"}, + {ORTH: "St.", NORM: "sant"}, + {ORTH: "Sta.", NORM: "santa"}, ]: _exc[exc_data[ORTH]] = [exc_data] # Times -_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}] +_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", NORM: "p.m."}] for h in range(1, 12 + 1): for period in ["a.m.", "am"]: - _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "a.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, NORM: "a.m."}] for period in ["p.m.", "pm"]: - _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "p.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, NORM: "p.m."}] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py index 826a6077b..ce25c546b 100644 --- a/spacy/lang/da/tokenizer_exceptions.py +++ b/spacy/lang/da/tokenizer_exceptions.py @@ -3,7 +3,7 @@ Tokenizer Exceptions. Source: https://forkortelse.dk/ and various others. """ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -13,44 +13,44 @@ _exc = {} # (for "torsdag") are left out because they are ambiguous. The same is the case # for abbreviations "jul." and "Jul." ("juli"). for exc_data in [ - {ORTH: "Kbh.", LEMMA: "København", NORM: "København"}, - {ORTH: "jan.", LEMMA: "januar"}, - {ORTH: "febr.", LEMMA: "februar"}, - {ORTH: "feb.", LEMMA: "februar"}, - {ORTH: "mar.", LEMMA: "marts"}, - {ORTH: "apr.", LEMMA: "april"}, - {ORTH: "jun.", LEMMA: "juni"}, - {ORTH: "aug.", LEMMA: "august"}, - {ORTH: "sept.", LEMMA: "september"}, - {ORTH: "sep.", LEMMA: "september"}, - {ORTH: "okt.", LEMMA: "oktober"}, - {ORTH: "nov.", LEMMA: "november"}, - {ORTH: "dec.", LEMMA: "december"}, - {ORTH: "man.", LEMMA: "mandag"}, - {ORTH: "tirs.", LEMMA: "tirsdag"}, - {ORTH: "ons.", LEMMA: "onsdag"}, - {ORTH: "tor.", LEMMA: "torsdag"}, - {ORTH: "tors.", LEMMA: "torsdag"}, - {ORTH: "fre.", LEMMA: "fredag"}, - {ORTH: "lør.", LEMMA: "lørdag"}, - {ORTH: "Jan.", LEMMA: "januar"}, - {ORTH: "Febr.", LEMMA: "februar"}, - {ORTH: "Feb.", LEMMA: "februar"}, - {ORTH: "Mar.", LEMMA: "marts"}, - {ORTH: "Apr.", LEMMA: "april"}, - {ORTH: "Jun.", LEMMA: "juni"}, - {ORTH: "Aug.", LEMMA: "august"}, - {ORTH: "Sept.", LEMMA: "september"}, - {ORTH: "Sep.", LEMMA: "september"}, - {ORTH: "Okt.", LEMMA: "oktober"}, - {ORTH: "Nov.", LEMMA: "november"}, - {ORTH: "Dec.", LEMMA: "december"}, - {ORTH: "Man.", LEMMA: "mandag"}, - {ORTH: "Tirs.", LEMMA: "tirsdag"}, - {ORTH: "Ons.", LEMMA: "onsdag"}, - {ORTH: "Fre.", LEMMA: "fredag"}, - {ORTH: "Lør.", LEMMA: "lørdag"}, - {ORTH: "og/eller", LEMMA: "og/eller", NORM: "og/eller"}, + {ORTH: "Kbh.", NORM: "København"}, + {ORTH: "jan.", NORM: "januar"}, + {ORTH: "febr.", NORM: "februar"}, + {ORTH: "feb.", NORM: "februar"}, + {ORTH: "mar.", NORM: "marts"}, + {ORTH: "apr.", NORM: "april"}, + {ORTH: "jun.", NORM: "juni"}, + {ORTH: "aug.", NORM: "august"}, + {ORTH: "sept.", NORM: "september"}, + {ORTH: "sep.", NORM: "september"}, + {ORTH: "okt.", NORM: "oktober"}, + {ORTH: "nov.", NORM: "november"}, + {ORTH: "dec.", NORM: "december"}, + {ORTH: "man.", NORM: "mandag"}, + {ORTH: "tirs.", NORM: "tirsdag"}, + {ORTH: "ons.", NORM: "onsdag"}, + {ORTH: "tor.", NORM: "torsdag"}, + {ORTH: "tors.", NORM: "torsdag"}, + {ORTH: "fre.", NORM: "fredag"}, + {ORTH: "lør.", NORM: "lørdag"}, + {ORTH: "Jan.", NORM: "januar"}, + {ORTH: "Febr.", NORM: "februar"}, + {ORTH: "Feb.", NORM: "februar"}, + {ORTH: "Mar.", NORM: "marts"}, + {ORTH: "Apr.", NORM: "april"}, + {ORTH: "Jun.", NORM: "juni"}, + {ORTH: "Aug.", NORM: "august"}, + {ORTH: "Sept.", NORM: "september"}, + {ORTH: "Sep.", NORM: "september"}, + {ORTH: "Okt.", NORM: "oktober"}, + {ORTH: "Nov.", NORM: "november"}, + {ORTH: "Dec.", NORM: "december"}, + {ORTH: "Man.", NORM: "mandag"}, + {ORTH: "Tirs.", NORM: "tirsdag"}, + {ORTH: "Ons.", NORM: "onsdag"}, + {ORTH: "Fre.", NORM: "fredag"}, + {ORTH: "Lør.", NORM: "lørdag"}, + {ORTH: "og/eller", NORM: "og/eller"}, ]: _exc[exc_data[ORTH]] = [exc_data] @@ -550,22 +550,22 @@ for orth in [ _exc[capitalized] = [{ORTH: capitalized}] for exc_data in [ - {ORTH: "s'gu", LEMMA: "s'gu", NORM: "s'gu"}, - {ORTH: "S'gu", LEMMA: "s'gu", NORM: "s'gu"}, - {ORTH: "sgu'", LEMMA: "s'gu", NORM: "s'gu"}, - {ORTH: "Sgu'", LEMMA: "s'gu", NORM: "s'gu"}, - {ORTH: "sku'", LEMMA: "skal", NORM: "skulle"}, - {ORTH: "ku'", LEMMA: "kan", NORM: "kunne"}, - {ORTH: "Ku'", LEMMA: "kan", NORM: "kunne"}, - {ORTH: "ka'", LEMMA: "kan", NORM: "kan"}, - {ORTH: "Ka'", LEMMA: "kan", NORM: "kan"}, - {ORTH: "gi'", LEMMA: "give", NORM: "giv"}, - {ORTH: "Gi'", LEMMA: "give", NORM: "giv"}, - {ORTH: "li'", LEMMA: "lide", NORM: "lide"}, - {ORTH: "ha'", LEMMA: "have", NORM: "have"}, - {ORTH: "Ha'", LEMMA: "have", NORM: "have"}, - {ORTH: "ik'", LEMMA: "ikke", NORM: "ikke"}, - {ORTH: "Ik'", LEMMA: "ikke", NORM: "ikke"}, + {ORTH: "s'gu", NORM: "s'gu"}, + {ORTH: "S'gu", NORM: "s'gu"}, + {ORTH: "sgu'", NORM: "s'gu"}, + {ORTH: "Sgu'", NORM: "s'gu"}, + {ORTH: "sku'", NORM: "skulle"}, + {ORTH: "ku'", NORM: "kunne"}, + {ORTH: "Ku'", NORM: "kunne"}, + {ORTH: "ka'", NORM: "kan"}, + {ORTH: "Ka'", NORM: "kan"}, + {ORTH: "gi'", NORM: "giv"}, + {ORTH: "Gi'", NORM: "giv"}, + {ORTH: "li'", NORM: "lide"}, + {ORTH: "ha'", NORM: "have"}, + {ORTH: "Ha'", NORM: "have"}, + {ORTH: "ik'", NORM: "ikke"}, + {ORTH: "Ik'", NORM: "ikke"}, ]: _exc[exc_data[ORTH]] = [exc_data] @@ -575,7 +575,7 @@ for h in range(1, 31 + 1): for period in ["."]: _exc[f"{h}{period}"] = [{ORTH: f"{h}."}] -_custom_base_exc = {"i.": [{ORTH: "i", LEMMA: "i", NORM: "i"}, {ORTH: "."}]} +_custom_base_exc = {"i.": [{ORTH: "i", NORM: "i"}, {ORTH: "."}]} _exc.update(_custom_base_exc) TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/de/tokenizer_exceptions.py b/spacy/lang/de/tokenizer_exceptions.py index d7860ace6..21d99cffe 100644 --- a/spacy/lang/de/tokenizer_exceptions.py +++ b/spacy/lang/de/tokenizer_exceptions.py @@ -1,159 +1,135 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, TAG, NORM, PRON_LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc _exc = { - "auf'm": [{ORTH: "auf", LEMMA: "auf"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}], - "du's": [ - {ORTH: "du", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "er's": [ - {ORTH: "er", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "hinter'm": [ - {ORTH: "hinter", LEMMA: "hinter"}, - {ORTH: "'m", LEMMA: "der", NORM: "dem"}, - ], - "ich's": [ - {ORTH: "ich", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "ihr's": [ - {ORTH: "ihr", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "sie's": [ - {ORTH: "sie", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "unter'm": [ - {ORTH: "unter", LEMMA: "unter"}, - {ORTH: "'m", LEMMA: "der", NORM: "dem"}, - ], - "vor'm": [{ORTH: "vor", LEMMA: "vor"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}], - "wir's": [ - {ORTH: "wir", LEMMA: PRON_LEMMA, TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, TAG: "PPER", NORM: "es"}, - ], - "über'm": [{ORTH: "über", LEMMA: "über"}, {ORTH: "'m", LEMMA: "der", NORM: "dem"}], + "auf'm": [{ORTH: "auf"}, {ORTH: "'m", NORM: "dem"}], + "du's": [{ORTH: "du"}, {ORTH: "'s", NORM: "es"}], + "er's": [{ORTH: "er"}, {ORTH: "'s", NORM: "es"}], + "hinter'm": [{ORTH: "hinter"}, {ORTH: "'m", NORM: "dem"}], + "ich's": [{ORTH: "ich"}, {ORTH: "'s", NORM: "es"}], + "ihr's": [{ORTH: "ihr"}, {ORTH: "'s", NORM: "es"}], + "sie's": [{ORTH: "sie"}, {ORTH: "'s", NORM: "es"}], + "unter'm": [{ORTH: "unter"}, {ORTH: "'m", NORM: "dem"}], + "vor'm": [{ORTH: "vor"}, {ORTH: "'m", NORM: "dem"}], + "wir's": [{ORTH: "wir"}, {ORTH: "'s", NORM: "es"}], + "über'm": [{ORTH: "über"}, {ORTH: "'m", NORM: "dem"}], } for exc_data in [ - {ORTH: "'S", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"}, - {ORTH: "S'", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"}, - {ORTH: "s'", LEMMA: PRON_LEMMA, NORM: "'s", TAG: "PPER"}, - {ORTH: "'n", LEMMA: "ein", NORM: "ein"}, - {ORTH: "'ne", LEMMA: "eine", NORM: "eine"}, - {ORTH: "'nen", LEMMA: "ein", NORM: "einen"}, - {ORTH: "'nem", LEMMA: "ein", NORM: "einem"}, - {ORTH: "Abb.", LEMMA: "Abbildung", NORM: "Abbildung"}, - {ORTH: "Abk.", LEMMA: "Abkürzung", NORM: "Abkürzung"}, - {ORTH: "Abt.", LEMMA: "Abteilung", NORM: "Abteilung"}, - {ORTH: "Apr.", LEMMA: "April", NORM: "April"}, - {ORTH: "Aug.", LEMMA: "August", NORM: "August"}, - {ORTH: "Bd.", LEMMA: "Band", NORM: "Band"}, - {ORTH: "Betr.", LEMMA: "Betreff", NORM: "Betreff"}, - {ORTH: "Bf.", LEMMA: "Bahnhof", NORM: "Bahnhof"}, - {ORTH: "Bhf.", LEMMA: "Bahnhof", NORM: "Bahnhof"}, - {ORTH: "Bsp.", LEMMA: "Beispiel", NORM: "Beispiel"}, - {ORTH: "Dez.", LEMMA: "Dezember", NORM: "Dezember"}, - {ORTH: "Di.", LEMMA: "Dienstag", NORM: "Dienstag"}, - {ORTH: "Do.", LEMMA: "Donnerstag", NORM: "Donnerstag"}, - {ORTH: "Fa.", LEMMA: "Firma", NORM: "Firma"}, - {ORTH: "Fam.", LEMMA: "Familie", NORM: "Familie"}, - {ORTH: "Feb.", LEMMA: "Februar", NORM: "Februar"}, - {ORTH: "Fr.", LEMMA: "Frau", NORM: "Frau"}, - {ORTH: "Frl.", LEMMA: "Fräulein", NORM: "Fräulein"}, - {ORTH: "Hbf.", LEMMA: "Hauptbahnhof", NORM: "Hauptbahnhof"}, - {ORTH: "Hr.", LEMMA: "Herr", NORM: "Herr"}, - {ORTH: "Hrn.", LEMMA: "Herr", NORM: "Herrn"}, - {ORTH: "Jan.", LEMMA: "Januar", NORM: "Januar"}, - {ORTH: "Jh.", LEMMA: "Jahrhundert", NORM: "Jahrhundert"}, - {ORTH: "Jhd.", LEMMA: "Jahrhundert", NORM: "Jahrhundert"}, - {ORTH: "Jul.", LEMMA: "Juli", NORM: "Juli"}, - {ORTH: "Jun.", LEMMA: "Juni", NORM: "Juni"}, - {ORTH: "Mi.", LEMMA: "Mittwoch", NORM: "Mittwoch"}, - {ORTH: "Mio.", LEMMA: "Million", NORM: "Million"}, - {ORTH: "Mo.", LEMMA: "Montag", NORM: "Montag"}, - {ORTH: "Mrd.", LEMMA: "Milliarde", NORM: "Milliarde"}, - {ORTH: "Mrz.", LEMMA: "März", NORM: "März"}, - {ORTH: "MwSt.", LEMMA: "Mehrwertsteuer", NORM: "Mehrwertsteuer"}, - {ORTH: "Mär.", LEMMA: "März", NORM: "März"}, - {ORTH: "Nov.", LEMMA: "November", NORM: "November"}, - {ORTH: "Nr.", LEMMA: "Nummer", NORM: "Nummer"}, - {ORTH: "Okt.", LEMMA: "Oktober", NORM: "Oktober"}, - {ORTH: "Orig.", LEMMA: "Original", NORM: "Original"}, - {ORTH: "Pkt.", LEMMA: "Punkt", NORM: "Punkt"}, - {ORTH: "Prof.", LEMMA: "Professor", NORM: "Professor"}, - {ORTH: "Red.", LEMMA: "Redaktion", NORM: "Redaktion"}, - {ORTH: "Sa.", LEMMA: "Samstag", NORM: "Samstag"}, - {ORTH: "Sep.", LEMMA: "September", NORM: "September"}, - {ORTH: "Sept.", LEMMA: "September", NORM: "September"}, - {ORTH: "So.", LEMMA: "Sonntag", NORM: "Sonntag"}, - {ORTH: "Std.", LEMMA: "Stunde", NORM: "Stunde"}, - {ORTH: "Str.", LEMMA: "Straße", NORM: "Straße"}, - {ORTH: "Tel.", LEMMA: "Telefon", NORM: "Telefon"}, - {ORTH: "Tsd.", LEMMA: "Tausend", NORM: "Tausend"}, - {ORTH: "Univ.", LEMMA: "Universität", NORM: "Universität"}, - {ORTH: "abzgl.", LEMMA: "abzüglich", NORM: "abzüglich"}, - {ORTH: "allg.", LEMMA: "allgemein", NORM: "allgemein"}, - {ORTH: "bspw.", LEMMA: "beispielsweise", NORM: "beispielsweise"}, - {ORTH: "bzgl.", LEMMA: "bezüglich", NORM: "bezüglich"}, - {ORTH: "bzw.", LEMMA: "beziehungsweise", NORM: "beziehungsweise"}, - {ORTH: "d.h.", LEMMA: "das heißt"}, - {ORTH: "dgl.", LEMMA: "dergleichen", NORM: "dergleichen"}, - {ORTH: "ebd.", LEMMA: "ebenda", NORM: "ebenda"}, - {ORTH: "eigtl.", LEMMA: "eigentlich", NORM: "eigentlich"}, - {ORTH: "engl.", LEMMA: "englisch", NORM: "englisch"}, - {ORTH: "evtl.", LEMMA: "eventuell", NORM: "eventuell"}, - {ORTH: "frz.", LEMMA: "französisch", NORM: "französisch"}, - {ORTH: "gegr.", LEMMA: "gegründet", NORM: "gegründet"}, - {ORTH: "ggf.", LEMMA: "gegebenenfalls", NORM: "gegebenenfalls"}, - {ORTH: "ggfs.", LEMMA: "gegebenenfalls", NORM: "gegebenenfalls"}, - {ORTH: "ggü.", LEMMA: "gegenüber", NORM: "gegenüber"}, - {ORTH: "i.O.", LEMMA: "in Ordnung"}, - {ORTH: "i.d.R.", LEMMA: "in der Regel"}, - {ORTH: "incl.", LEMMA: "inklusive", NORM: "inklusive"}, - {ORTH: "inkl.", LEMMA: "inklusive", NORM: "inklusive"}, - {ORTH: "insb.", LEMMA: "insbesondere", NORM: "insbesondere"}, - {ORTH: "kath.", LEMMA: "katholisch", NORM: "katholisch"}, - {ORTH: "lt.", LEMMA: "laut", NORM: "laut"}, - {ORTH: "max.", LEMMA: "maximal", NORM: "maximal"}, - {ORTH: "min.", LEMMA: "minimal", NORM: "minimal"}, - {ORTH: "mind.", LEMMA: "mindestens", NORM: "mindestens"}, - {ORTH: "mtl.", LEMMA: "monatlich", NORM: "monatlich"}, - {ORTH: "n.Chr.", LEMMA: "nach Christus"}, - {ORTH: "orig.", LEMMA: "original", NORM: "original"}, - {ORTH: "röm.", LEMMA: "römisch", NORM: "römisch"}, - {ORTH: "s.o.", LEMMA: "siehe oben"}, - {ORTH: "sog.", LEMMA: "so genannt"}, - {ORTH: "stellv.", LEMMA: "stellvertretend"}, - {ORTH: "tägl.", LEMMA: "täglich", NORM: "täglich"}, - {ORTH: "u.U.", LEMMA: "unter Umständen"}, - {ORTH: "u.s.w.", LEMMA: "und so weiter"}, - {ORTH: "u.v.m.", LEMMA: "und vieles mehr"}, - {ORTH: "usf.", LEMMA: "und so fort"}, - {ORTH: "usw.", LEMMA: "und so weiter"}, - {ORTH: "uvm.", LEMMA: "und vieles mehr"}, - {ORTH: "v.Chr.", LEMMA: "vor Christus"}, - {ORTH: "v.a.", LEMMA: "vor allem"}, - {ORTH: "v.l.n.r.", LEMMA: "von links nach rechts"}, - {ORTH: "vgl.", LEMMA: "vergleiche", NORM: "vergleiche"}, - {ORTH: "vllt.", LEMMA: "vielleicht", NORM: "vielleicht"}, - {ORTH: "vlt.", LEMMA: "vielleicht", NORM: "vielleicht"}, - {ORTH: "z.B.", LEMMA: "zum Beispiel"}, - {ORTH: "z.Bsp.", LEMMA: "zum Beispiel"}, - {ORTH: "z.T.", LEMMA: "zum Teil"}, - {ORTH: "z.Z.", LEMMA: "zur Zeit"}, - {ORTH: "z.Zt.", LEMMA: "zur Zeit"}, - {ORTH: "z.b.", LEMMA: "zum Beispiel"}, - {ORTH: "zzgl.", LEMMA: "zuzüglich"}, - {ORTH: "österr.", LEMMA: "österreichisch", NORM: "österreichisch"}, + {ORTH: "'S", NORM: "'s"}, + {ORTH: "'s", NORM: "'s"}, + {ORTH: "S'", NORM: "'s"}, + {ORTH: "s'", NORM: "'s"}, + {ORTH: "'n", NORM: "ein"}, + {ORTH: "'ne", NORM: "eine"}, + {ORTH: "'nen", NORM: "einen"}, + {ORTH: "'nem", NORM: "einem"}, + {ORTH: "Abb.", NORM: "Abbildung"}, + {ORTH: "Abk.", NORM: "Abkürzung"}, + {ORTH: "Abt.", NORM: "Abteilung"}, + {ORTH: "Apr.", NORM: "April"}, + {ORTH: "Aug.", NORM: "August"}, + {ORTH: "Bd.", NORM: "Band"}, + {ORTH: "Betr.", NORM: "Betreff"}, + {ORTH: "Bf.", NORM: "Bahnhof"}, + {ORTH: "Bhf.", NORM: "Bahnhof"}, + {ORTH: "Bsp.", NORM: "Beispiel"}, + {ORTH: "Dez.", NORM: "Dezember"}, + {ORTH: "Di.", NORM: "Dienstag"}, + {ORTH: "Do.", NORM: "Donnerstag"}, + {ORTH: "Fa.", NORM: "Firma"}, + {ORTH: "Fam.", NORM: "Familie"}, + {ORTH: "Feb.", NORM: "Februar"}, + {ORTH: "Fr.", NORM: "Frau"}, + {ORTH: "Frl.", NORM: "Fräulein"}, + {ORTH: "Hbf.", NORM: "Hauptbahnhof"}, + {ORTH: "Hr.", NORM: "Herr"}, + {ORTH: "Hrn.", NORM: "Herrn"}, + {ORTH: "Jan.", NORM: "Januar"}, + {ORTH: "Jh.", NORM: "Jahrhundert"}, + {ORTH: "Jhd.", NORM: "Jahrhundert"}, + {ORTH: "Jul.", NORM: "Juli"}, + {ORTH: "Jun.", NORM: "Juni"}, + {ORTH: "Mi.", NORM: "Mittwoch"}, + {ORTH: "Mio.", NORM: "Million"}, + {ORTH: "Mo.", NORM: "Montag"}, + {ORTH: "Mrd.", NORM: "Milliarde"}, + {ORTH: "Mrz.", NORM: "März"}, + {ORTH: "MwSt.", NORM: "Mehrwertsteuer"}, + {ORTH: "Mär.", NORM: "März"}, + {ORTH: "Nov.", NORM: "November"}, + {ORTH: "Nr.", NORM: "Nummer"}, + {ORTH: "Okt.", NORM: "Oktober"}, + {ORTH: "Orig.", NORM: "Original"}, + {ORTH: "Pkt.", NORM: "Punkt"}, + {ORTH: "Prof.", NORM: "Professor"}, + {ORTH: "Red.", NORM: "Redaktion"}, + {ORTH: "Sa.", NORM: "Samstag"}, + {ORTH: "Sep.", NORM: "September"}, + {ORTH: "Sept.", NORM: "September"}, + {ORTH: "So.", NORM: "Sonntag"}, + {ORTH: "Std.", NORM: "Stunde"}, + {ORTH: "Str.", NORM: "Straße"}, + {ORTH: "Tel.", NORM: "Telefon"}, + {ORTH: "Tsd.", NORM: "Tausend"}, + {ORTH: "Univ.", NORM: "Universität"}, + {ORTH: "abzgl.", NORM: "abzüglich"}, + {ORTH: "allg.", NORM: "allgemein"}, + {ORTH: "bspw.", NORM: "beispielsweise"}, + {ORTH: "bzgl.", NORM: "bezüglich"}, + {ORTH: "bzw.", NORM: "beziehungsweise"}, + {ORTH: "d.h."}, + {ORTH: "dgl.", NORM: "dergleichen"}, + {ORTH: "ebd.", NORM: "ebenda"}, + {ORTH: "eigtl.", NORM: "eigentlich"}, + {ORTH: "engl.", NORM: "englisch"}, + {ORTH: "evtl.", NORM: "eventuell"}, + {ORTH: "frz.", NORM: "französisch"}, + {ORTH: "gegr.", NORM: "gegründet"}, + {ORTH: "ggf.", NORM: "gegebenenfalls"}, + {ORTH: "ggfs.", NORM: "gegebenenfalls"}, + {ORTH: "ggü.", NORM: "gegenüber"}, + {ORTH: "i.O."}, + {ORTH: "i.d.R."}, + {ORTH: "incl.", NORM: "inklusive"}, + {ORTH: "inkl.", NORM: "inklusive"}, + {ORTH: "insb.", NORM: "insbesondere"}, + {ORTH: "kath.", NORM: "katholisch"}, + {ORTH: "lt.", NORM: "laut"}, + {ORTH: "max.", NORM: "maximal"}, + {ORTH: "min.", NORM: "minimal"}, + {ORTH: "mind.", NORM: "mindestens"}, + {ORTH: "mtl.", NORM: "monatlich"}, + {ORTH: "n.Chr."}, + {ORTH: "orig.", NORM: "original"}, + {ORTH: "röm.", NORM: "römisch"}, + {ORTH: "s.o."}, + {ORTH: "sog."}, + {ORTH: "stellv."}, + {ORTH: "tägl.", NORM: "täglich"}, + {ORTH: "u.U."}, + {ORTH: "u.s.w."}, + {ORTH: "u.v.m."}, + {ORTH: "usf."}, + {ORTH: "usw."}, + {ORTH: "uvm."}, + {ORTH: "v.Chr."}, + {ORTH: "v.a."}, + {ORTH: "v.l.n.r."}, + {ORTH: "vgl.", NORM: "vergleiche"}, + {ORTH: "vllt.", NORM: "vielleicht"}, + {ORTH: "vlt.", NORM: "vielleicht"}, + {ORTH: "z.B."}, + {ORTH: "z.Bsp."}, + {ORTH: "z.T."}, + {ORTH: "z.Z."}, + {ORTH: "z.Zt."}, + {ORTH: "z.b."}, + {ORTH: "zzgl."}, + {ORTH: "österr.", NORM: "österreichisch"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/el/tokenizer_exceptions.py b/spacy/lang/el/tokenizer_exceptions.py index f9810828b..0a36d5d2b 100644 --- a/spacy/lang/el/tokenizer_exceptions.py +++ b/spacy/lang/el/tokenizer_exceptions.py @@ -1,130 +1,128 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc _exc = {} for token in ["Απ'", "ΑΠ'", "αφ'", "Αφ'"]: - _exc[token] = [{ORTH: token, LEMMA: "από", NORM: "από"}] + _exc[token] = [{ORTH: token, NORM: "από"}] for token in ["Αλλ'", "αλλ'"]: - _exc[token] = [{ORTH: token, LEMMA: "αλλά", NORM: "αλλά"}] + _exc[token] = [{ORTH: token, NORM: "αλλά"}] for token in ["παρ'", "Παρ'", "ΠΑΡ'"]: - _exc[token] = [{ORTH: token, LEMMA: "παρά", NORM: "παρά"}] + _exc[token] = [{ORTH: token, NORM: "παρά"}] for token in ["καθ'", "Καθ'"]: - _exc[token] = [{ORTH: token, LEMMA: "κάθε", NORM: "κάθε"}] + _exc[token] = [{ORTH: token, NORM: "κάθε"}] for token in ["κατ'", "Κατ'"]: - _exc[token] = [{ORTH: token, LEMMA: "κατά", NORM: "κατά"}] + _exc[token] = [{ORTH: token, NORM: "κατά"}] for token in ["'ΣΟΥΝ", "'ναι", "'ταν", "'τανε", "'μαστε", "'μουνα", "'μουν"]: - _exc[token] = [{ORTH: token, LEMMA: "είμαι", NORM: "είμαι"}] + _exc[token] = [{ORTH: token, NORM: "είμαι"}] for token in ["Επ'", "επ'", "εφ'", "Εφ'"]: - _exc[token] = [{ORTH: token, LEMMA: "επί", NORM: "επί"}] + _exc[token] = [{ORTH: token, NORM: "επί"}] for token in ["Δι'", "δι'"]: - _exc[token] = [{ORTH: token, LEMMA: "δια", NORM: "δια"}] + _exc[token] = [{ORTH: token, NORM: "δια"}] for token in ["'χουν", "'χουμε", "'χαμε", "'χα", "'χε", "'χεις", "'χει"]: - _exc[token] = [{ORTH: token, LEMMA: "έχω", NORM: "έχω"}] + _exc[token] = [{ORTH: token, NORM: "έχω"}] for token in ["υπ'", "Υπ'"]: - _exc[token] = [{ORTH: token, LEMMA: "υπό", NORM: "υπό"}] + _exc[token] = [{ORTH: token, NORM: "υπό"}] for token in ["Μετ'", "ΜΕΤ'", "'μετ"]: - _exc[token] = [{ORTH: token, LEMMA: "μετά", NORM: "μετά"}] + _exc[token] = [{ORTH: token, NORM: "μετά"}] for token in ["Μ'", "μ'"]: - _exc[token] = [{ORTH: token, LEMMA: "με", NORM: "με"}] + _exc[token] = [{ORTH: token, NORM: "με"}] for token in ["Γι'", "ΓΙ'", "γι'"]: - _exc[token] = [{ORTH: token, LEMMA: "για", NORM: "για"}] + _exc[token] = [{ORTH: token, NORM: "για"}] for token in ["Σ'", "σ'"]: - _exc[token] = [{ORTH: token, LEMMA: "σε", NORM: "σε"}] + _exc[token] = [{ORTH: token, NORM: "σε"}] for token in ["Θ'", "θ'"]: - _exc[token] = [{ORTH: token, LEMMA: "θα", NORM: "θα"}] + _exc[token] = [{ORTH: token, NORM: "θα"}] for token in ["Ν'", "ν'"]: - _exc[token] = [{ORTH: token, LEMMA: "να", NORM: "να"}] + _exc[token] = [{ORTH: token, NORM: "να"}] for token in ["Τ'", "τ'"]: - _exc[token] = [{ORTH: token, LEMMA: "να", NORM: "να"}] + _exc[token] = [{ORTH: token, NORM: "να"}] for token in ["'γω", "'σένα", "'μεις"]: - _exc[token] = [{ORTH: token, LEMMA: "εγώ", NORM: "εγώ"}] + _exc[token] = [{ORTH: token, NORM: "εγώ"}] for token in ["Τ'", "τ'"]: - _exc[token] = [{ORTH: token, LEMMA: "το", NORM: "το"}] + _exc[token] = [{ORTH: token, NORM: "το"}] for token in ["Φέρ'", "Φερ'", "φέρ'", "φερ'"]: - _exc[token] = [{ORTH: token, LEMMA: "φέρνω", NORM: "φέρνω"}] + _exc[token] = [{ORTH: token, NORM: "φέρνω"}] for token in ["'ρθούνε", "'ρθουν", "'ρθει", "'ρθεί", "'ρθε", "'ρχεται"]: - _exc[token] = [{ORTH: token, LEMMA: "έρχομαι", NORM: "έρχομαι"}] + _exc[token] = [{ORTH: token, NORM: "έρχομαι"}] for token in ["'πανε", "'λεγε", "'λεγαν", "'πε", "'λεγα"]: - _exc[token] = [{ORTH: token, LEMMA: "λέγω", NORM: "λέγω"}] + _exc[token] = [{ORTH: token, NORM: "λέγω"}] for token in ["Πάρ'", "πάρ'"]: - _exc[token] = [{ORTH: token, LEMMA: "παίρνω", NORM: "παίρνω"}] + _exc[token] = [{ORTH: token, NORM: "παίρνω"}] for token in ["μέσ'", "Μέσ'", "μεσ'"]: - _exc[token] = [{ORTH: token, LEMMA: "μέσα", NORM: "μέσα"}] + _exc[token] = [{ORTH: token, NORM: "μέσα"}] for token in ["Δέσ'", "Δεσ'", "δεσ'"]: - _exc[token] = [{ORTH: token, LEMMA: "δένω", NORM: "δένω"}] + _exc[token] = [{ORTH: token, NORM: "δένω"}] for token in ["'κανε", "Κάν'"]: - _exc[token] = [{ORTH: token, LEMMA: "κάνω", NORM: "κάνω"}] + _exc[token] = [{ORTH: token, NORM: "κάνω"}] _other_exc = { - "κι": [{ORTH: "κι", LEMMA: "και", NORM: "και"}], - "Παίξ'": [{ORTH: "Παίξ'", LEMMA: "παίζω", NORM: "παίζω"}], - "Αντ'": [{ORTH: "Αντ'", LEMMA: "αντί", NORM: "αντί"}], - "ολ'": [{ORTH: "ολ'", LEMMA: "όλος", NORM: "όλος"}], - "ύστερ'": [{ORTH: "ύστερ'", LEMMA: "ύστερα", NORM: "ύστερα"}], - "'πρεπε": [{ORTH: "'πρεπε", LEMMA: "πρέπει", NORM: "πρέπει"}], - "Δύσκολ'": [{ORTH: "Δύσκολ'", LEMMA: "δύσκολος", NORM: "δύσκολος"}], - "'θελα": [{ORTH: "'θελα", LEMMA: "θέλω", NORM: "θέλω"}], - "'γραφα": [{ORTH: "'γραφα", LEMMA: "γράφω", NORM: "γράφω"}], - "'παιρνα": [{ORTH: "'παιρνα", LEMMA: "παίρνω", NORM: "παίρνω"}], - "'δειξε": [{ORTH: "'δειξε", LEMMA: "δείχνω", NORM: "δείχνω"}], - "όμουρφ'": [{ORTH: "όμουρφ'", LEMMA: "όμορφος", NORM: "όμορφος"}], - "κ'τσή": [{ORTH: "κ'τσή", LEMMA: "κουτσός", NORM: "κουτσός"}], - "μηδ'": [{ORTH: "μηδ'", LEMMA: "μήδε", NORM: "μήδε"}], - "'ξομολογήθηκε": [ - {ORTH: "'ξομολογήθηκε", LEMMA: "εξομολογούμαι", NORM: "εξομολογούμαι"} - ], - "'μας": [{ORTH: "'μας", LEMMA: "εμάς", NORM: "εμάς"}], - "'ξερες": [{ORTH: "'ξερες", LEMMA: "ξέρω", NORM: "ξέρω"}], - "έφθασ'": [{ORTH: "έφθασ'", LEMMA: "φθάνω", NORM: "φθάνω"}], - "εξ'": [{ORTH: "εξ'", LEMMA: "εκ", NORM: "εκ"}], - "δώσ'": [{ORTH: "δώσ'", LEMMA: "δίνω", NORM: "δίνω"}], - "τίποτ'": [{ORTH: "τίποτ'", LEMMA: "τίποτα", NORM: "τίποτα"}], - "Λήξ'": [{ORTH: "Λήξ'", LEMMA: "λήγω", NORM: "λήγω"}], - "άσ'": [{ORTH: "άσ'", LEMMA: "αφήνω", NORM: "αφήνω"}], - "Στ'": [{ORTH: "Στ'", LEMMA: "στο", NORM: "στο"}], - "Δωσ'": [{ORTH: "Δωσ'", LEMMA: "δίνω", NORM: "δίνω"}], - "Βάψ'": [{ORTH: "Βάψ'", LEMMA: "βάφω", NORM: "βάφω"}], - "Αλλ'": [{ORTH: "Αλλ'", LEMMA: "αλλά", NORM: "αλλά"}], - "Αμ'": [{ORTH: "Αμ'", LEMMA: "άμα", NORM: "άμα"}], - "Αγόρασ'": [{ORTH: "Αγόρασ'", LEMMA: "αγοράζω", NORM: "αγοράζω"}], - "'φύγε": [{ORTH: "'φύγε", LEMMA: "φεύγω", NORM: "φεύγω"}], - "'φερε": [{ORTH: "'φερε", LEMMA: "φέρνω", NORM: "φέρνω"}], - "'φαγε": [{ORTH: "'φαγε", LEMMA: "τρώω", NORM: "τρώω"}], - "'σπαγαν": [{ORTH: "'σπαγαν", LEMMA: "σπάω", NORM: "σπάω"}], - "'σκασε": [{ORTH: "'σκασε", LEMMA: "σκάω", NORM: "σκάω"}], - "'σβηνε": [{ORTH: "'σβηνε", LEMMA: "σβήνω", NORM: "σβήνω"}], - "'ριξε": [{ORTH: "'ριξε", LEMMA: "ρίχνω", NORM: "ρίχνω"}], - "'κλεβε": [{ORTH: "'κλεβε", LEMMA: "κλέβω", NORM: "κλέβω"}], - "'κει": [{ORTH: "'κει", LEMMA: "εκεί", NORM: "εκεί"}], - "'βλεπε": [{ORTH: "'βλεπε", LEMMA: "βλέπω", NORM: "βλέπω"}], - "'βγαινε": [{ORTH: "'βγαινε", LEMMA: "βγαίνω", NORM: "βγαίνω"}], + "κι": [{ORTH: "κι", NORM: "και"}], + "Παίξ'": [{ORTH: "Παίξ'", NORM: "παίζω"}], + "Αντ'": [{ORTH: "Αντ'", NORM: "αντί"}], + "ολ'": [{ORTH: "ολ'", NORM: "όλος"}], + "ύστερ'": [{ORTH: "ύστερ'", NORM: "ύστερα"}], + "'πρεπε": [{ORTH: "'πρεπε", NORM: "πρέπει"}], + "Δύσκολ'": [{ORTH: "Δύσκολ'", NORM: "δύσκολος"}], + "'θελα": [{ORTH: "'θελα", NORM: "θέλω"}], + "'γραφα": [{ORTH: "'γραφα", NORM: "γράφω"}], + "'παιρνα": [{ORTH: "'παιρνα", NORM: "παίρνω"}], + "'δειξε": [{ORTH: "'δειξε", NORM: "δείχνω"}], + "όμουρφ'": [{ORTH: "όμουρφ'", NORM: "όμορφος"}], + "κ'τσή": [{ORTH: "κ'τσή", NORM: "κουτσός"}], + "μηδ'": [{ORTH: "μηδ'", NORM: "μήδε"}], + "'ξομολογήθηκε": [{ORTH: "'ξομολογήθηκε", NORM: "εξομολογούμαι"}], + "'μας": [{ORTH: "'μας", NORM: "εμάς"}], + "'ξερες": [{ORTH: "'ξερες", NORM: "ξέρω"}], + "έφθασ'": [{ORTH: "έφθασ'", NORM: "φθάνω"}], + "εξ'": [{ORTH: "εξ'", NORM: "εκ"}], + "δώσ'": [{ORTH: "δώσ'", NORM: "δίνω"}], + "τίποτ'": [{ORTH: "τίποτ'", NORM: "τίποτα"}], + "Λήξ'": [{ORTH: "Λήξ'", NORM: "λήγω"}], + "άσ'": [{ORTH: "άσ'", NORM: "αφήνω"}], + "Στ'": [{ORTH: "Στ'", NORM: "στο"}], + "Δωσ'": [{ORTH: "Δωσ'", NORM: "δίνω"}], + "Βάψ'": [{ORTH: "Βάψ'", NORM: "βάφω"}], + "Αλλ'": [{ORTH: "Αλλ'", NORM: "αλλά"}], + "Αμ'": [{ORTH: "Αμ'", NORM: "άμα"}], + "Αγόρασ'": [{ORTH: "Αγόρασ'", NORM: "αγοράζω"}], + "'φύγε": [{ORTH: "'φύγε", NORM: "φεύγω"}], + "'φερε": [{ORTH: "'φερε", NORM: "φέρνω"}], + "'φαγε": [{ORTH: "'φαγε", NORM: "τρώω"}], + "'σπαγαν": [{ORTH: "'σπαγαν", NORM: "σπάω"}], + "'σκασε": [{ORTH: "'σκασε", NORM: "σκάω"}], + "'σβηνε": [{ORTH: "'σβηνε", NORM: "σβήνω"}], + "'ριξε": [{ORTH: "'ριξε", NORM: "ρίχνω"}], + "'κλεβε": [{ORTH: "'κλεβε", NORM: "κλέβω"}], + "'κει": [{ORTH: "'κει", NORM: "εκεί"}], + "'βλεπε": [{ORTH: "'βλεπε", NORM: "βλέπω"}], + "'βγαινε": [{ORTH: "'βγαινε", NORM: "βγαίνω"}], } _exc.update(_other_exc) @@ -134,35 +132,35 @@ for h in range(1, 12 + 1): for period in ["π.μ.", "πμ"]: _exc[f"{h}{period}"] = [ {ORTH: f"{h}"}, - {ORTH: period, LEMMA: "π.μ.", NORM: "π.μ."}, + {ORTH: period, NORM: "π.μ."}, ] for period in ["μ.μ.", "μμ"]: _exc[f"{h}{period}"] = [ {ORTH: f"{h}"}, - {ORTH: period, LEMMA: "μ.μ.", NORM: "μ.μ."}, + {ORTH: period, NORM: "μ.μ."}, ] for exc_data in [ - {ORTH: "ΑΓΡ.", LEMMA: "Αγροτικός", NORM: "Αγροτικός"}, - {ORTH: "Αγ. Γρ.", LEMMA: "Αγία Γραφή", NORM: "Αγία Γραφή"}, - {ORTH: "Αθ.", LEMMA: "Αθανάσιος", NORM: "Αθανάσιος"}, - {ORTH: "Αλεξ.", LEMMA: "Αλέξανδρος", NORM: "Αλέξανδρος"}, - {ORTH: "Απρ.", LEMMA: "Απρίλιος", NORM: "Απρίλιος"}, - {ORTH: "Αύγ.", LEMMA: "Αύγουστος", NORM: "Αύγουστος"}, - {ORTH: "Δεκ.", LEMMA: "Δεκέμβριος", NORM: "Δεκέμβριος"}, - {ORTH: "Δημ.", LEMMA: "Δήμος", NORM: "Δήμος"}, - {ORTH: "Ιαν.", LEMMA: "Ιανουάριος", NORM: "Ιανουάριος"}, - {ORTH: "Ιούλ.", LEMMA: "Ιούλιος", NORM: "Ιούλιος"}, - {ORTH: "Ιούν.", LEMMA: "Ιούνιος", NORM: "Ιούνιος"}, - {ORTH: "Ιωαν.", LEMMA: "Ιωάννης", NORM: "Ιωάννης"}, - {ORTH: "Μ. Ασία", LEMMA: "Μικρά Ασία", NORM: "Μικρά Ασία"}, - {ORTH: "Μάρτ.", LEMMA: "Μάρτιος", NORM: "Μάρτιος"}, - {ORTH: "Μάρτ'", LEMMA: "Μάρτιος", NORM: "Μάρτιος"}, - {ORTH: "Νοέμβρ.", LEMMA: "Νοέμβριος", NORM: "Νοέμβριος"}, - {ORTH: "Οκτ.", LEMMA: "Οκτώβριος", NORM: "Οκτώβριος"}, - {ORTH: "Σεπτ.", LEMMA: "Σεπτέμβριος", NORM: "Σεπτέμβριος"}, - {ORTH: "Φεβρ.", LEMMA: "Φεβρουάριος", NORM: "Φεβρουάριος"}, + {ORTH: "ΑΓΡ.", NORM: "Αγροτικός"}, + {ORTH: "Αγ. Γρ.", NORM: "Αγία Γραφή"}, + {ORTH: "Αθ.", NORM: "Αθανάσιος"}, + {ORTH: "Αλεξ.", NORM: "Αλέξανδρος"}, + {ORTH: "Απρ.", NORM: "Απρίλιος"}, + {ORTH: "Αύγ.", NORM: "Αύγουστος"}, + {ORTH: "Δεκ.", NORM: "Δεκέμβριος"}, + {ORTH: "Δημ.", NORM: "Δήμος"}, + {ORTH: "Ιαν.", NORM: "Ιανουάριος"}, + {ORTH: "Ιούλ.", NORM: "Ιούλιος"}, + {ORTH: "Ιούν.", NORM: "Ιούνιος"}, + {ORTH: "Ιωαν.", NORM: "Ιωάννης"}, + {ORTH: "Μ. Ασία", NORM: "Μικρά Ασία"}, + {ORTH: "Μάρτ.", NORM: "Μάρτιος"}, + {ORTH: "Μάρτ'", NORM: "Μάρτιος"}, + {ORTH: "Νοέμβρ.", NORM: "Νοέμβριος"}, + {ORTH: "Οκτ.", NORM: "Οκτώβριος"}, + {ORTH: "Σεπτ.", NORM: "Σεπτέμβριος"}, + {ORTH: "Φεβρ.", NORM: "Φεβρουάριος"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index 226678430..c210e1a19 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, TAG, NORM, PRON_LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc @@ -28,110 +28,110 @@ _exclude = [ for pron in ["i"]: for orth in [pron, pron.title()]: _exc[orth + "'m"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'m", LEMMA: "be", NORM: "am", TAG: "VBP"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'m", NORM: "am"}, ] _exc[orth + "m"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "m", LEMMA: "be", TAG: "VBP", "tenspect": 1, "number": 1}, + {ORTH: orth, NORM: pron}, + {ORTH: "m", "tenspect": 1, "number": 1}, ] _exc[orth + "'ma"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'m", LEMMA: "be", NORM: "am"}, - {ORTH: "a", LEMMA: "going to", NORM: "gonna"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'m", NORM: "am"}, + {ORTH: "a", NORM: "gonna"}, ] _exc[orth + "ma"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "m", LEMMA: "be", NORM: "am"}, - {ORTH: "a", LEMMA: "going to", NORM: "gonna"}, + {ORTH: orth, NORM: pron}, + {ORTH: "m", NORM: "am"}, + {ORTH: "a", NORM: "gonna"}, ] for pron in ["i", "you", "he", "she", "it", "we", "they"]: for orth in [pron, pron.title()]: _exc[orth + "'ll"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'ll", NORM: "will"}, ] _exc[orth + "ll"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}, + {ORTH: orth, NORM: pron}, + {ORTH: "ll", NORM: "will"}, ] _exc[orth + "'ll've"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'ll", NORM: "will"}, + {ORTH: "'ve", NORM: "have"}, ] _exc[orth + "llve"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "ll", NORM: "will"}, + {ORTH: "ve", NORM: "have"}, ] _exc[orth + "'d"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, + {ORTH: orth, NORM: pron}, {ORTH: "'d", NORM: "'d"}, ] _exc[orth + "d"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, + {ORTH: orth, NORM: pron}, {ORTH: "d", NORM: "'d"}, ] _exc[orth + "'d've"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'d", NORM: "would"}, + {ORTH: "'ve", NORM: "have"}, ] _exc[orth + "dve"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "d", NORM: "would"}, + {ORTH: "ve", NORM: "have"}, ] for pron in ["i", "you", "we", "they"]: for orth in [pron, pron.title()]: _exc[orth + "'ve"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'ve", NORM: "have"}, ] _exc[orth + "ve"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: pron}, + {ORTH: "ve", NORM: "have"}, ] for pron in ["you", "we", "they"]: for orth in [pron, pron.title()]: _exc[orth + "'re"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "'re", LEMMA: "be", NORM: "are"}, + {ORTH: orth, NORM: pron}, + {ORTH: "'re", NORM: "are"}, ] _exc[orth + "re"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, - {ORTH: "re", LEMMA: "be", NORM: "are", TAG: "VBZ"}, + {ORTH: orth, NORM: pron}, + {ORTH: "re", NORM: "are"}, ] for pron in ["he", "she", "it"]: for orth in [pron, pron.title()]: _exc[orth + "'s"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, + {ORTH: orth, NORM: pron}, {ORTH: "'s", NORM: "'s"}, ] _exc[orth + "s"] = [ - {ORTH: orth, LEMMA: PRON_LEMMA, NORM: pron, TAG: "PRP"}, + {ORTH: orth, NORM: pron}, {ORTH: "s"}, ] @@ -153,145 +153,145 @@ for word in [ ]: for orth in [word, word.title()]: _exc[orth + "'s"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, + {ORTH: orth, NORM: word}, {ORTH: "'s", NORM: "'s"}, ] - _exc[orth + "s"] = [{ORTH: orth, LEMMA: word, NORM: word}, {ORTH: "s"}] + _exc[orth + "s"] = [{ORTH: orth, NORM: word}, {ORTH: "s"}] _exc[orth + "'ll"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}, + {ORTH: orth, NORM: word}, + {ORTH: "'ll", NORM: "will"}, ] _exc[orth + "ll"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}, + {ORTH: orth, NORM: word}, + {ORTH: "ll", NORM: "will"}, ] _exc[orth + "'ll've"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "'ll", LEMMA: "will", NORM: "will", TAG: "MD"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: word}, + {ORTH: "'ll", NORM: "will"}, + {ORTH: "'ve", NORM: "have"}, ] _exc[orth + "llve"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "ll", LEMMA: "will", NORM: "will", TAG: "MD"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: word}, + {ORTH: "ll", NORM: "will"}, + {ORTH: "ve", NORM: "have"}, ] _exc[orth + "'re"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "'re", LEMMA: "be", NORM: "are"}, + {ORTH: orth, NORM: word}, + {ORTH: "'re", NORM: "are"}, ] _exc[orth + "re"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "re", LEMMA: "be", NORM: "are"}, + {ORTH: orth, NORM: word}, + {ORTH: "re", NORM: "are"}, ] _exc[orth + "'ve"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "'ve", LEMMA: "have", TAG: "VB"}, + {ORTH: orth, NORM: word}, + {ORTH: "'ve"}, ] _exc[orth + "ve"] = [ - {ORTH: orth, LEMMA: word}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth}, + {ORTH: "ve", NORM: "have"}, ] _exc[orth + "'d"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, + {ORTH: orth, NORM: word}, {ORTH: "'d", NORM: "'d"}, ] _exc[orth + "d"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, + {ORTH: orth, NORM: word}, {ORTH: "d", NORM: "'d"}, ] _exc[orth + "'d've"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "'d", LEMMA: "would", NORM: "would", TAG: "MD"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: word}, + {ORTH: "'d", NORM: "would"}, + {ORTH: "'ve", NORM: "have"}, ] _exc[orth + "dve"] = [ - {ORTH: orth, LEMMA: word, NORM: word}, - {ORTH: "d", LEMMA: "would", NORM: "would", TAG: "MD"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: orth, NORM: word}, + {ORTH: "d", NORM: "would"}, + {ORTH: "ve", NORM: "have"}, ] # Verbs for verb_data in [ - {ORTH: "ca", LEMMA: "can", NORM: "can", TAG: "MD"}, - {ORTH: "could", NORM: "could", TAG: "MD"}, - {ORTH: "do", LEMMA: "do", NORM: "do"}, - {ORTH: "does", LEMMA: "do", NORM: "does"}, - {ORTH: "did", LEMMA: "do", NORM: "do", TAG: "VBD"}, - {ORTH: "had", LEMMA: "have", NORM: "have", TAG: "VBD"}, - {ORTH: "may", NORM: "may", TAG: "MD"}, - {ORTH: "might", NORM: "might", TAG: "MD"}, - {ORTH: "must", NORM: "must", TAG: "MD"}, + {ORTH: "ca", NORM: "can"}, + {ORTH: "could", NORM: "could"}, + {ORTH: "do", NORM: "do"}, + {ORTH: "does", NORM: "does"}, + {ORTH: "did", NORM: "do"}, + {ORTH: "had", NORM: "have"}, + {ORTH: "may", NORM: "may"}, + {ORTH: "might", NORM: "might"}, + {ORTH: "must", NORM: "must"}, {ORTH: "need", NORM: "need"}, - {ORTH: "ought", NORM: "ought", TAG: "MD"}, - {ORTH: "sha", LEMMA: "shall", NORM: "shall", TAG: "MD"}, - {ORTH: "should", NORM: "should", TAG: "MD"}, - {ORTH: "wo", LEMMA: "will", NORM: "will", TAG: "MD"}, - {ORTH: "would", NORM: "would", TAG: "MD"}, + {ORTH: "ought", NORM: "ought"}, + {ORTH: "sha", NORM: "shall"}, + {ORTH: "should", NORM: "should"}, + {ORTH: "wo", NORM: "will"}, + {ORTH: "would", NORM: "would"}, ]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title() for data in [verb_data, verb_data_tc]: _exc[data[ORTH] + "n't"] = [ dict(data), - {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"}, + {ORTH: "n't", NORM: "not"}, ] _exc[data[ORTH] + "nt"] = [ dict(data), - {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"}, + {ORTH: "nt", NORM: "not"}, ] _exc[data[ORTH] + "n't've"] = [ dict(data), - {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: "n't", NORM: "not"}, + {ORTH: "'ve", NORM: "have"}, ] _exc[data[ORTH] + "ntve"] = [ dict(data), - {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, + {ORTH: "nt", NORM: "not"}, + {ORTH: "ve", NORM: "have"}, ] for verb_data in [ - {ORTH: "could", NORM: "could", TAG: "MD"}, - {ORTH: "might", NORM: "might", TAG: "MD"}, - {ORTH: "must", NORM: "must", TAG: "MD"}, - {ORTH: "should", NORM: "should", TAG: "MD"}, - {ORTH: "would", NORM: "would", TAG: "MD"}, + {ORTH: "could", NORM: "could"}, + {ORTH: "might", NORM: "might"}, + {ORTH: "must", NORM: "must"}, + {ORTH: "should", NORM: "should"}, + {ORTH: "would", NORM: "would"}, ]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title() for data in [verb_data, verb_data_tc]: - _exc[data[ORTH] + "'ve"] = [dict(data), {ORTH: "'ve", LEMMA: "have", TAG: "VB"}] + _exc[data[ORTH] + "'ve"] = [dict(data), {ORTH: "'ve"}] - _exc[data[ORTH] + "ve"] = [dict(data), {ORTH: "ve", LEMMA: "have", TAG: "VB"}] + _exc[data[ORTH] + "ve"] = [dict(data), {ORTH: "ve"}] for verb_data in [ - {ORTH: "ai", LEMMA: "be", TAG: "VBP", "number": 2}, - {ORTH: "are", LEMMA: "be", NORM: "are", TAG: "VBP", "number": 2}, - {ORTH: "is", LEMMA: "be", NORM: "is", TAG: "VBZ"}, - {ORTH: "was", LEMMA: "be", NORM: "was"}, - {ORTH: "were", LEMMA: "be", NORM: "were"}, + {ORTH: "ai", "number": 2}, + {ORTH: "are", NORM: "are", "number": 2}, + {ORTH: "is", NORM: "is"}, + {ORTH: "was", NORM: "was"}, + {ORTH: "were", NORM: "were"}, {ORTH: "have", NORM: "have"}, - {ORTH: "has", LEMMA: "have", NORM: "has"}, + {ORTH: "has", NORM: "has"}, {ORTH: "dare", NORM: "dare"}, ]: verb_data_tc = dict(verb_data) @@ -299,24 +299,24 @@ for verb_data in [ for data in [verb_data, verb_data_tc]: _exc[data[ORTH] + "n't"] = [ dict(data), - {ORTH: "n't", LEMMA: "not", NORM: "not", TAG: "RB"}, + {ORTH: "n't", NORM: "not"}, ] _exc[data[ORTH] + "nt"] = [ dict(data), - {ORTH: "nt", LEMMA: "not", NORM: "not", TAG: "RB"}, + {ORTH: "nt", NORM: "not"}, ] # Other contractions with trailing apostrophe for exc_data in [ - {ORTH: "doin", LEMMA: "do", NORM: "doing"}, - {ORTH: "goin", LEMMA: "go", NORM: "going"}, - {ORTH: "nothin", LEMMA: "nothing", NORM: "nothing"}, - {ORTH: "nuthin", LEMMA: "nothing", NORM: "nothing"}, - {ORTH: "ol", LEMMA: "old", NORM: "old"}, - {ORTH: "somethin", LEMMA: "something", NORM: "something"}, + {ORTH: "doin", NORM: "doing"}, + {ORTH: "goin", NORM: "going"}, + {ORTH: "nothin", NORM: "nothing"}, + {ORTH: "nuthin", NORM: "nothing"}, + {ORTH: "ol", NORM: "old"}, + {ORTH: "somethin", NORM: "something"}, ]: exc_data_tc = dict(exc_data) exc_data_tc[ORTH] = exc_data_tc[ORTH].title() @@ -331,9 +331,9 @@ for exc_data in [ for exc_data in [ {ORTH: "cause", NORM: "because"}, - {ORTH: "em", LEMMA: PRON_LEMMA, NORM: "them"}, - {ORTH: "ll", LEMMA: "will", NORM: "will"}, - {ORTH: "nuff", LEMMA: "enough", NORM: "enough"}, + {ORTH: "em", NORM: "them"}, + {ORTH: "ll", NORM: "will"}, + {ORTH: "nuff", NORM: "enough"}, ]: exc_data_apos = dict(exc_data) exc_data_apos[ORTH] = "'" + exc_data_apos[ORTH] @@ -347,166 +347,131 @@ for h in range(1, 12 + 1): for period in ["a.m.", "am"]: _exc[f"{h}{period}"] = [ {ORTH: f"{h}"}, - {ORTH: period, LEMMA: "a.m.", NORM: "a.m."}, + {ORTH: period, NORM: "a.m."}, ] for period in ["p.m.", "pm"]: _exc[f"{h}{period}"] = [ {ORTH: f"{h}"}, - {ORTH: period, LEMMA: "p.m.", NORM: "p.m."}, + {ORTH: period, NORM: "p.m."}, ] # Rest _other_exc = { - "y'all": [{ORTH: "y'", LEMMA: PRON_LEMMA, NORM: "you"}, {ORTH: "all"}], - "yall": [{ORTH: "y", LEMMA: PRON_LEMMA, NORM: "you"}, {ORTH: "all"}], - "how'd'y": [ - {ORTH: "how", LEMMA: "how"}, - {ORTH: "'d", LEMMA: "do"}, - {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"}, - ], - "How'd'y": [ - {ORTH: "How", LEMMA: "how", NORM: "how"}, - {ORTH: "'d", LEMMA: "do"}, - {ORTH: "'y", LEMMA: PRON_LEMMA, NORM: "you"}, - ], - "not've": [ - {ORTH: "not", LEMMA: "not", TAG: "RB"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, - ], - "notve": [ - {ORTH: "not", LEMMA: "not", TAG: "RB"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, - ], - "Not've": [ - {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"}, - {ORTH: "'ve", LEMMA: "have", NORM: "have", TAG: "VB"}, - ], - "Notve": [ - {ORTH: "Not", LEMMA: "not", NORM: "not", TAG: "RB"}, - {ORTH: "ve", LEMMA: "have", NORM: "have", TAG: "VB"}, - ], - "cannot": [ - {ORTH: "can", LEMMA: "can", TAG: "MD"}, - {ORTH: "not", LEMMA: "not", TAG: "RB"}, - ], - "Cannot": [ - {ORTH: "Can", LEMMA: "can", NORM: "can", TAG: "MD"}, - {ORTH: "not", LEMMA: "not", TAG: "RB"}, - ], - "gonna": [ - {ORTH: "gon", LEMMA: "go", NORM: "going"}, - {ORTH: "na", LEMMA: "to", NORM: "to"}, - ], - "Gonna": [ - {ORTH: "Gon", LEMMA: "go", NORM: "going"}, - {ORTH: "na", LEMMA: "to", NORM: "to"}, - ], - "gotta": [{ORTH: "got"}, {ORTH: "ta", LEMMA: "to", NORM: "to"}], - "Gotta": [{ORTH: "Got", NORM: "got"}, {ORTH: "ta", LEMMA: "to", NORM: "to"}], - "let's": [{ORTH: "let"}, {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}], - "Let's": [ - {ORTH: "Let", LEMMA: "let", NORM: "let"}, - {ORTH: "'s", LEMMA: PRON_LEMMA, NORM: "us"}, - ], - "c'mon": [{ORTH: "c'm", NORM: "come", LEMMA: "come"}, {ORTH: "on"}], - "C'mon": [{ORTH: "C'm", NORM: "come", LEMMA: "come"}, {ORTH: "on"}], + "y'all": [{ORTH: "y'", NORM: "you"}, {ORTH: "all"}], + "yall": [{ORTH: "y", NORM: "you"}, {ORTH: "all"}], + "how'd'y": [{ORTH: "how"}, {ORTH: "'d"}, {ORTH: "'y", NORM: "you"}], + "How'd'y": [{ORTH: "How", NORM: "how"}, {ORTH: "'d"}, {ORTH: "'y", NORM: "you"}], + "not've": [{ORTH: "not"}, {ORTH: "'ve", NORM: "have"}], + "notve": [{ORTH: "not"}, {ORTH: "ve", NORM: "have"}], + "Not've": [{ORTH: "Not", NORM: "not"}, {ORTH: "'ve", NORM: "have"}], + "Notve": [{ORTH: "Not", NORM: "not"}, {ORTH: "ve", NORM: "have"}], + "cannot": [{ORTH: "can"}, {ORTH: "not"}], + "Cannot": [{ORTH: "Can", NORM: "can"}, {ORTH: "not"}], + "gonna": [{ORTH: "gon", NORM: "going"}, {ORTH: "na", NORM: "to"}], + "Gonna": [{ORTH: "Gon", NORM: "going"}, {ORTH: "na", NORM: "to"}], + "gotta": [{ORTH: "got"}, {ORTH: "ta", NORM: "to"}], + "Gotta": [{ORTH: "Got", NORM: "got"}, {ORTH: "ta", NORM: "to"}], + "let's": [{ORTH: "let"}, {ORTH: "'s", NORM: "us"}], + "Let's": [{ORTH: "Let", NORM: "let"}, {ORTH: "'s", NORM: "us"}], + "c'mon": [{ORTH: "c'm", NORM: "come"}, {ORTH: "on"}], + "C'mon": [{ORTH: "C'm", NORM: "come"}, {ORTH: "on"}], } _exc.update(_other_exc) for exc_data in [ - {ORTH: "'S", LEMMA: "'s", NORM: "'s"}, - {ORTH: "'s", LEMMA: "'s", NORM: "'s"}, - {ORTH: "\u2018S", LEMMA: "'s", NORM: "'s"}, - {ORTH: "\u2018s", LEMMA: "'s", NORM: "'s"}, - {ORTH: "and/or", LEMMA: "and/or", NORM: "and/or", TAG: "CC"}, - {ORTH: "w/o", LEMMA: "without", NORM: "without"}, - {ORTH: "'re", LEMMA: "be", NORM: "are"}, - {ORTH: "'Cause", LEMMA: "because", NORM: "because"}, - {ORTH: "'cause", LEMMA: "because", NORM: "because"}, - {ORTH: "'cos", LEMMA: "because", NORM: "because"}, - {ORTH: "'Cos", LEMMA: "because", NORM: "because"}, - {ORTH: "'coz", LEMMA: "because", NORM: "because"}, - {ORTH: "'Coz", LEMMA: "because", NORM: "because"}, - {ORTH: "'cuz", LEMMA: "because", NORM: "because"}, - {ORTH: "'Cuz", LEMMA: "because", NORM: "because"}, - {ORTH: "'bout", LEMMA: "about", NORM: "about"}, - {ORTH: "ma'am", LEMMA: "madam", NORM: "madam"}, - {ORTH: "Ma'am", LEMMA: "madam", NORM: "madam"}, - {ORTH: "o'clock", LEMMA: "o'clock", NORM: "o'clock"}, - {ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"}, - {ORTH: "lovin'", LEMMA: "love", NORM: "loving"}, - {ORTH: "Lovin'", LEMMA: "love", NORM: "loving"}, - {ORTH: "lovin", LEMMA: "love", NORM: "loving"}, - {ORTH: "Lovin", LEMMA: "love", NORM: "loving"}, - {ORTH: "havin'", LEMMA: "have", NORM: "having"}, - {ORTH: "Havin'", LEMMA: "have", NORM: "having"}, - {ORTH: "havin", LEMMA: "have", NORM: "having"}, - {ORTH: "Havin", LEMMA: "have", NORM: "having"}, - {ORTH: "doin'", LEMMA: "do", NORM: "doing"}, - {ORTH: "Doin'", LEMMA: "do", NORM: "doing"}, - {ORTH: "doin", LEMMA: "do", NORM: "doing"}, - {ORTH: "Doin", LEMMA: "do", NORM: "doing"}, - {ORTH: "goin'", LEMMA: "go", NORM: "going"}, - {ORTH: "Goin'", LEMMA: "go", NORM: "going"}, - {ORTH: "goin", LEMMA: "go", NORM: "going"}, - {ORTH: "Goin", LEMMA: "go", NORM: "going"}, - {ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"}, - {ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"}, - {ORTH: "Ala.", LEMMA: "Alabama", NORM: "Alabama"}, - {ORTH: "Apr.", LEMMA: "April", NORM: "April"}, - {ORTH: "Ariz.", LEMMA: "Arizona", NORM: "Arizona"}, - {ORTH: "Ark.", LEMMA: "Arkansas", NORM: "Arkansas"}, - {ORTH: "Aug.", LEMMA: "August", NORM: "August"}, - {ORTH: "Calif.", LEMMA: "California", NORM: "California"}, - {ORTH: "Colo.", LEMMA: "Colorado", NORM: "Colorado"}, - {ORTH: "Conn.", LEMMA: "Connecticut", NORM: "Connecticut"}, - {ORTH: "Dec.", LEMMA: "December", NORM: "December"}, - {ORTH: "Del.", LEMMA: "Delaware", NORM: "Delaware"}, - {ORTH: "Feb.", LEMMA: "February", NORM: "February"}, - {ORTH: "Fla.", LEMMA: "Florida", NORM: "Florida"}, - {ORTH: "Ga.", LEMMA: "Georgia", NORM: "Georgia"}, - {ORTH: "Ia.", LEMMA: "Iowa", NORM: "Iowa"}, - {ORTH: "Id.", LEMMA: "Idaho", NORM: "Idaho"}, - {ORTH: "Ill.", LEMMA: "Illinois", NORM: "Illinois"}, - {ORTH: "Ind.", LEMMA: "Indiana", NORM: "Indiana"}, - {ORTH: "Jan.", LEMMA: "January", NORM: "January"}, - {ORTH: "Jul.", LEMMA: "July", NORM: "July"}, - {ORTH: "Jun.", LEMMA: "June", NORM: "June"}, - {ORTH: "Kan.", LEMMA: "Kansas", NORM: "Kansas"}, - {ORTH: "Kans.", LEMMA: "Kansas", NORM: "Kansas"}, - {ORTH: "Ky.", LEMMA: "Kentucky", NORM: "Kentucky"}, - {ORTH: "La.", LEMMA: "Louisiana", NORM: "Louisiana"}, - {ORTH: "Mar.", LEMMA: "March", NORM: "March"}, - {ORTH: "Mass.", LEMMA: "Massachusetts", NORM: "Massachusetts"}, - {ORTH: "May.", LEMMA: "May", NORM: "May"}, - {ORTH: "Mich.", LEMMA: "Michigan", NORM: "Michigan"}, - {ORTH: "Minn.", LEMMA: "Minnesota", NORM: "Minnesota"}, - {ORTH: "Miss.", LEMMA: "Mississippi", NORM: "Mississippi"}, - {ORTH: "N.C.", LEMMA: "North Carolina", NORM: "North Carolina"}, - {ORTH: "N.D.", LEMMA: "North Dakota", NORM: "North Dakota"}, - {ORTH: "N.H.", LEMMA: "New Hampshire", NORM: "New Hampshire"}, - {ORTH: "N.J.", LEMMA: "New Jersey", NORM: "New Jersey"}, - {ORTH: "N.M.", LEMMA: "New Mexico", NORM: "New Mexico"}, - {ORTH: "N.Y.", LEMMA: "New York", NORM: "New York"}, - {ORTH: "Neb.", LEMMA: "Nebraska", NORM: "Nebraska"}, - {ORTH: "Nebr.", LEMMA: "Nebraska", NORM: "Nebraska"}, - {ORTH: "Nev.", LEMMA: "Nevada", NORM: "Nevada"}, - {ORTH: "Nov.", LEMMA: "November", NORM: "November"}, - {ORTH: "Oct.", LEMMA: "October", NORM: "October"}, - {ORTH: "Okla.", LEMMA: "Oklahoma", NORM: "Oklahoma"}, - {ORTH: "Ore.", LEMMA: "Oregon", NORM: "Oregon"}, - {ORTH: "Pa.", LEMMA: "Pennsylvania", NORM: "Pennsylvania"}, - {ORTH: "S.C.", LEMMA: "South Carolina", NORM: "South Carolina"}, - {ORTH: "Sep.", LEMMA: "September", NORM: "September"}, - {ORTH: "Sept.", LEMMA: "September", NORM: "September"}, - {ORTH: "Tenn.", LEMMA: "Tennessee", NORM: "Tennessee"}, - {ORTH: "Va.", LEMMA: "Virginia", NORM: "Virginia"}, - {ORTH: "Wash.", LEMMA: "Washington", NORM: "Washington"}, - {ORTH: "Wis.", LEMMA: "Wisconsin", NORM: "Wisconsin"}, + {ORTH: "'S", NORM: "'s"}, + {ORTH: "'s", NORM: "'s"}, + {ORTH: "\u2018S", NORM: "'s"}, + {ORTH: "\u2018s", NORM: "'s"}, + {ORTH: "and/or", NORM: "and/or"}, + {ORTH: "w/o", NORM: "without"}, + {ORTH: "'re", NORM: "are"}, + {ORTH: "'Cause", NORM: "because"}, + {ORTH: "'cause", NORM: "because"}, + {ORTH: "'cos", NORM: "because"}, + {ORTH: "'Cos", NORM: "because"}, + {ORTH: "'coz", NORM: "because"}, + {ORTH: "'Coz", NORM: "because"}, + {ORTH: "'cuz", NORM: "because"}, + {ORTH: "'Cuz", NORM: "because"}, + {ORTH: "'bout", NORM: "about"}, + {ORTH: "ma'am", NORM: "madam"}, + {ORTH: "Ma'am", NORM: "madam"}, + {ORTH: "o'clock", NORM: "o'clock"}, + {ORTH: "O'clock", NORM: "o'clock"}, + {ORTH: "lovin'", NORM: "loving"}, + {ORTH: "Lovin'", NORM: "loving"}, + {ORTH: "lovin", NORM: "loving"}, + {ORTH: "Lovin", NORM: "loving"}, + {ORTH: "havin'", NORM: "having"}, + {ORTH: "Havin'", NORM: "having"}, + {ORTH: "havin", NORM: "having"}, + {ORTH: "Havin", NORM: "having"}, + {ORTH: "doin'", NORM: "doing"}, + {ORTH: "Doin'", NORM: "doing"}, + {ORTH: "doin", NORM: "doing"}, + {ORTH: "Doin", NORM: "doing"}, + {ORTH: "goin'", NORM: "going"}, + {ORTH: "Goin'", NORM: "going"}, + {ORTH: "goin", NORM: "going"}, + {ORTH: "Goin", NORM: "going"}, + {ORTH: "Mt.", NORM: "Mount"}, + {ORTH: "Ak.", NORM: "Alaska"}, + {ORTH: "Ala.", NORM: "Alabama"}, + {ORTH: "Apr.", NORM: "April"}, + {ORTH: "Ariz.", NORM: "Arizona"}, + {ORTH: "Ark.", NORM: "Arkansas"}, + {ORTH: "Aug.", NORM: "August"}, + {ORTH: "Calif.", NORM: "California"}, + {ORTH: "Colo.", NORM: "Colorado"}, + {ORTH: "Conn.", NORM: "Connecticut"}, + {ORTH: "Dec.", NORM: "December"}, + {ORTH: "Del.", NORM: "Delaware"}, + {ORTH: "Feb.", NORM: "February"}, + {ORTH: "Fla.", NORM: "Florida"}, + {ORTH: "Ga.", NORM: "Georgia"}, + {ORTH: "Ia.", NORM: "Iowa"}, + {ORTH: "Id.", NORM: "Idaho"}, + {ORTH: "Ill.", NORM: "Illinois"}, + {ORTH: "Ind.", NORM: "Indiana"}, + {ORTH: "Jan.", NORM: "January"}, + {ORTH: "Jul.", NORM: "July"}, + {ORTH: "Jun.", NORM: "June"}, + {ORTH: "Kan.", NORM: "Kansas"}, + {ORTH: "Kans.", NORM: "Kansas"}, + {ORTH: "Ky.", NORM: "Kentucky"}, + {ORTH: "La.", NORM: "Louisiana"}, + {ORTH: "Mar.", NORM: "March"}, + {ORTH: "Mass.", NORM: "Massachusetts"}, + {ORTH: "May.", NORM: "May"}, + {ORTH: "Mich.", NORM: "Michigan"}, + {ORTH: "Minn.", NORM: "Minnesota"}, + {ORTH: "Miss.", NORM: "Mississippi"}, + {ORTH: "N.C.", NORM: "North Carolina"}, + {ORTH: "N.D.", NORM: "North Dakota"}, + {ORTH: "N.H.", NORM: "New Hampshire"}, + {ORTH: "N.J.", NORM: "New Jersey"}, + {ORTH: "N.M.", NORM: "New Mexico"}, + {ORTH: "N.Y.", NORM: "New York"}, + {ORTH: "Neb.", NORM: "Nebraska"}, + {ORTH: "Nebr.", NORM: "Nebraska"}, + {ORTH: "Nev.", NORM: "Nevada"}, + {ORTH: "Nov.", NORM: "November"}, + {ORTH: "Oct.", NORM: "October"}, + {ORTH: "Okla.", NORM: "Oklahoma"}, + {ORTH: "Ore.", NORM: "Oregon"}, + {ORTH: "Pa.", NORM: "Pennsylvania"}, + {ORTH: "S.C.", NORM: "South Carolina"}, + {ORTH: "Sep.", NORM: "September"}, + {ORTH: "Sept.", NORM: "September"}, + {ORTH: "Tenn.", NORM: "Tennessee"}, + {ORTH: "Va.", NORM: "Virginia"}, + {ORTH: "Wash.", NORM: "Washington"}, + {ORTH: "Wis.", NORM: "Wisconsin"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/es/tokenizer_exceptions.py b/spacy/lang/es/tokenizer_exceptions.py index 63124578e..fbfe75545 100644 --- a/spacy/lang/es/tokenizer_exceptions.py +++ b/spacy/lang/es/tokenizer_exceptions.py @@ -1,27 +1,27 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM, PRON_LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc _exc = { - "pal": [{ORTH: "pa", LEMMA: "para"}, {ORTH: "l", LEMMA: "el", NORM: "el"}], + "pal": [{ORTH: "pa"}, {ORTH: "l", NORM: "el"}], } for exc_data in [ - {ORTH: "n°", LEMMA: "número"}, - {ORTH: "°C", LEMMA: "grados Celcius"}, - {ORTH: "aprox.", LEMMA: "aproximadamente"}, - {ORTH: "dna.", LEMMA: "docena"}, - {ORTH: "dpto.", LEMMA: "departamento"}, - {ORTH: "ej.", LEMMA: "ejemplo"}, - {ORTH: "esq.", LEMMA: "esquina"}, - {ORTH: "pág.", LEMMA: "página"}, - {ORTH: "p.ej.", LEMMA: "por ejemplo"}, - {ORTH: "Ud.", LEMMA: PRON_LEMMA, NORM: "usted"}, - {ORTH: "Vd.", LEMMA: PRON_LEMMA, NORM: "usted"}, - {ORTH: "Uds.", LEMMA: PRON_LEMMA, NORM: "ustedes"}, - {ORTH: "Vds.", LEMMA: PRON_LEMMA, NORM: "ustedes"}, + {ORTH: "n°"}, + {ORTH: "°C"}, + {ORTH: "aprox."}, + {ORTH: "dna."}, + {ORTH: "dpto."}, + {ORTH: "ej."}, + {ORTH: "esq."}, + {ORTH: "pág."}, + {ORTH: "p.ej."}, + {ORTH: "Ud.", NORM: "usted"}, + {ORTH: "Vd.", NORM: "usted"}, + {ORTH: "Uds.", NORM: "ustedes"}, + {ORTH: "Vds.", NORM: "ustedes"}, {ORTH: "vol.", NORM: "volúmen"}, ]: _exc[exc_data[ORTH]] = [exc_data] @@ -29,14 +29,14 @@ for exc_data in [ # Times -_exc["12m."] = [{ORTH: "12"}, {ORTH: "m.", LEMMA: "p.m."}] +_exc["12m."] = [{ORTH: "12"}, {ORTH: "m."}] for h in range(1, 12 + 1): for period in ["a.m.", "am"]: - _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "a.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period}] for period in ["p.m.", "pm"]: - _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period, LEMMA: "p.m."}] + _exc[f"{h}{period}"] = [{ORTH: f"{h}"}, {ORTH: period}] for orth in [ diff --git a/spacy/lang/fa/tokenizer_exceptions.py b/spacy/lang/fa/tokenizer_exceptions.py index db9e3f6fc..30df798ab 100644 --- a/spacy/lang/fa/tokenizer_exceptions.py +++ b/spacy/lang/fa/tokenizer_exceptions.py @@ -1,2753 +1,747 @@ -from ...symbols import ORTH, LEMMA, TAG, NORM +from ...symbols import ORTH, NORM -_exc = { - ".ق ": [{LEMMA: "قمری", ORTH: ".ق "}], - ".م": [{LEMMA: "میلادی", ORTH: ".م"}], - ".هـ": [{LEMMA: "هجری", ORTH: ".هـ"}], - "ب.م": [{LEMMA: "بعد از میلاد", ORTH: "ب.م"}], - "ق.م": [{LEMMA: "قبل از میلاد", ORTH: "ق.م"}], +TOKENIZER_EXCEPTIONS = { + ".ق ": [{ORTH: ".ق "}], + ".م": [{ORTH: ".م"}], + ".هـ": [{ORTH: ".هـ"}], + "ب.م": [{ORTH: "ب.م"}], + "ق.م": [{ORTH: "ق.م"}], + "آبرویت": [{ORTH: "آبروی", NORM: "آبروی"}, {ORTH: "ت", NORM: "ت"}], + "آب‌نباتش": [{ORTH: "آب‌نبات", NORM: "آب‌نبات"}, {ORTH: "ش", NORM: "ش"}], + "آثارش": [{ORTH: "آثار", NORM: "آثار"}, {ORTH: "ش", NORM: "ش"}], + "آخرش": [{ORTH: "آخر", NORM: "آخر"}, {ORTH: "ش", NORM: "ش"}], + "آدمهاست": [{ORTH: "آدمها", NORM: "آدمها"}, {ORTH: "ست", NORM: "ست"}], + "آرزومندیم": [{ORTH: "آرزومند", NORM: "آرزومند"}, {ORTH: "یم", NORM: "یم"}], + "آزادند": [{ORTH: "آزاد", NORM: "آزاد"}, {ORTH: "ند", NORM: "ند"}], + "آسیب‌پذیرند": [{ORTH: "آسیب‌پذیر", NORM: "آسیب‌پذیر"}, {ORTH: "ند", NORM: "ند"}], + "آفریده‌اند": [{ORTH: "آفریده‌", NORM: "آفریده‌"}, {ORTH: "اند", NORM: "اند"}], + "آمدنش": [{ORTH: "آمدن", NORM: "آمدن"}, {ORTH: "ش", NORM: "ش"}], + "آمریکاست": [{ORTH: "آمریکا", NORM: "آمریکا"}, {ORTH: "ست", NORM: "ست"}], + "آنجاست": [{ORTH: "آنجا", NORM: "آنجا"}, {ORTH: "ست", NORM: "ست"}], + "آنست": [{ORTH: "آن", NORM: "آن"}, {ORTH: "ست", NORM: "ست"}], + "آنند": [{ORTH: "آن", NORM: "آن"}, {ORTH: "ند", NORM: "ند"}], + "آن‌هاست": [{ORTH: "آن‌ها", NORM: "آن‌ها"}, {ORTH: "ست", NORM: "ست"}], + "آپاداناست": [{ORTH: "آپادانا", NORM: "آپادانا"}, {ORTH: "ست", NORM: "ست"}], + "اجتماعی‌مان": [{ORTH: "اجتماعی‌", NORM: "اجتماعی‌"}, {ORTH: "مان", NORM: "مان"}], + "اجدادت": [{ORTH: "اجداد", NORM: "اجداد"}, {ORTH: "ت", NORM: "ت"}], + "اجدادش": [{ORTH: "اجداد", NORM: "اجداد"}, {ORTH: "ش", NORM: "ش"}], + "اجدادی‌شان": [{ORTH: "اجدادی‌", NORM: "اجدادی‌"}, {ORTH: "شان", NORM: "شان"}], + "اجراست": [{ORTH: "اجرا", NORM: "اجرا"}, {ORTH: "ست", NORM: "ست"}], + "اختیارش": [{ORTH: "اختیار", NORM: "اختیار"}, {ORTH: "ش", NORM: "ش"}], + "اخلاقشان": [{ORTH: "اخلاق", NORM: "اخلاق"}, {ORTH: "شان", NORM: "شان"}], + "ادعایمان": [{ORTH: "ادعای", NORM: "ادعای"}, {ORTH: "مان", NORM: "مان"}], + "اذیتش": [{ORTH: "اذیت", NORM: "اذیت"}, {ORTH: "ش", NORM: "ش"}], + "اراده‌اش": [{ORTH: "اراده‌", NORM: "اراده‌"}, {ORTH: "اش", NORM: "اش"}], + "ارتباطش": [{ORTH: "ارتباط", NORM: "ارتباط"}, {ORTH: "ش", NORM: "ش"}], + "ارتباطمان": [{ORTH: "ارتباط", NORM: "ارتباط"}, {ORTH: "مان", NORM: "مان"}], + "ارزشهاست": [{ORTH: "ارزشها", NORM: "ارزشها"}, {ORTH: "ست", NORM: "ست"}], + "ارزی‌اش": [{ORTH: "ارزی‌", NORM: "ارزی‌"}, {ORTH: "اش", NORM: "اش"}], + "اره‌اش": [{ORTH: "اره‌", NORM: "اره‌"}, {ORTH: "اش", NORM: "اش"}], + "ازش": [{ORTH: "از", NORM: "از"}, {ORTH: "ش", NORM: "ش"}], + "ازین": [{ORTH: "از", NORM: "از"}, {ORTH: "ین", NORM: "ین"}], + "ازین‌هاست": [ + {ORTH: "از", NORM: "از"}, + {ORTH: "ین‌ها", NORM: "ین‌ها"}, + {ORTH: "ست", NORM: "ست"}, + ], + "استخوانند": [{ORTH: "استخوان", NORM: "استخوان"}, {ORTH: "ند", NORM: "ند"}], + "اسلامند": [{ORTH: "اسلام", NORM: "اسلام"}, {ORTH: "ند", NORM: "ند"}], + "اسلامی‌اند": [{ORTH: "اسلامی‌", NORM: "اسلامی‌"}, {ORTH: "اند", NORM: "اند"}], + "اسلحه‌هایشان": [ + {ORTH: "اسلحه‌های", NORM: "اسلحه‌های"}, + {ORTH: "شان", NORM: "شان"}, + ], + "اسمت": [{ORTH: "اسم", NORM: "اسم"}, {ORTH: "ت", NORM: "ت"}], + "اسمش": [{ORTH: "اسم", NORM: "اسم"}, {ORTH: "ش", NORM: "ش"}], + "اشتباهند": [{ORTH: "اشتباه", NORM: "اشتباه"}, {ORTH: "ند", NORM: "ند"}], + "اصلش": [{ORTH: "اصل", NORM: "اصل"}, {ORTH: "ش", NORM: "ش"}], + "اطاقش": [{ORTH: "اطاق", NORM: "اطاق"}, {ORTH: "ش", NORM: "ش"}], + "اعتقادند": [{ORTH: "اعتقاد", NORM: "اعتقاد"}, {ORTH: "ند", NORM: "ند"}], + "اعلایش": [{ORTH: "اعلای", NORM: "اعلای"}, {ORTH: "ش", NORM: "ش"}], + "افتراست": [{ORTH: "افترا", NORM: "افترا"}, {ORTH: "ست", NORM: "ست"}], + "افطارت": [{ORTH: "افطار", NORM: "افطار"}, {ORTH: "ت", NORM: "ت"}], + "اقوامش": [{ORTH: "اقوام", NORM: "اقوام"}, {ORTH: "ش", NORM: "ش"}], + "امروزیش": [{ORTH: "امروزی", NORM: "امروزی"}, {ORTH: "ش", NORM: "ش"}], + "اموالش": [{ORTH: "اموال", NORM: "اموال"}, {ORTH: "ش", NORM: "ش"}], + "امیدوارند": [{ORTH: "امیدوار", NORM: "امیدوار"}, {ORTH: "ند", NORM: "ند"}], + "امیدواریم": [{ORTH: "امیدوار", NORM: "امیدوار"}, {ORTH: "یم", NORM: "یم"}], + "انتخابهایم": [{ORTH: "انتخابها", NORM: "انتخابها"}, {ORTH: "یم", NORM: "یم"}], + "انتظارم": [{ORTH: "انتظار", NORM: "انتظار"}, {ORTH: "م", NORM: "م"}], + "انجمنم": [{ORTH: "انجمن", NORM: "انجمن"}, {ORTH: "م", NORM: "م"}], + "اندرش": [{ORTH: "اندر", NORM: "اندر"}, {ORTH: "ش", NORM: "ش"}], + "انشایش": [{ORTH: "انشای", NORM: "انشای"}, {ORTH: "ش", NORM: "ش"}], + "انگشتشان": [{ORTH: "انگشت", NORM: "انگشت"}, {ORTH: "شان", NORM: "شان"}], + "انگشتهایش": [{ORTH: "انگشتهای", NORM: "انگشتهای"}, {ORTH: "ش", NORM: "ش"}], + "اهمیتشان": [{ORTH: "اهمیت", NORM: "اهمیت"}, {ORTH: "شان", NORM: "شان"}], + "اهمیتند": [{ORTH: "اهمیت", NORM: "اهمیت"}, {ORTH: "ند", NORM: "ند"}], + "اوایلش": [{ORTH: "اوایل", NORM: "اوایل"}, {ORTH: "ش", NORM: "ش"}], + "اوست": [{ORTH: "او", NORM: "او"}, {ORTH: "ست", NORM: "ست"}], + "اولش": [{ORTH: "اول", NORM: "اول"}, {ORTH: "ش", NORM: "ش"}], + "اولشان": [{ORTH: "اول", NORM: "اول"}, {ORTH: "شان", NORM: "شان"}], + "اولم": [{ORTH: "اول", NORM: "اول"}, {ORTH: "م", NORM: "م"}], + "اکثرشان": [{ORTH: "اکثر", NORM: "اکثر"}, {ORTH: "شان", NORM: "شان"}], + "ایتالیاست": [{ORTH: "ایتالیا", NORM: "ایتالیا"}, {ORTH: "ست", NORM: "ست"}], + "ایرانی‌اش": [{ORTH: "ایرانی‌", NORM: "ایرانی‌"}, {ORTH: "اش", NORM: "اش"}], + "اینجاست": [{ORTH: "اینجا", NORM: "اینجا"}, {ORTH: "ست", NORM: "ست"}], + "این‌هاست": [{ORTH: "این‌ها", NORM: "این‌ها"}, {ORTH: "ست", NORM: "ست"}], + "بابات": [{ORTH: "بابا", NORM: "بابا"}, {ORTH: "ت", NORM: "ت"}], + "بارش": [{ORTH: "بار", NORM: "بار"}, {ORTH: "ش", NORM: "ش"}], + "بازیگرانش": [{ORTH: "بازیگران", NORM: "بازیگران"}, {ORTH: "ش", NORM: "ش"}], + "بازیگرمان": [{ORTH: "بازیگر", NORM: "بازیگر"}, {ORTH: "مان", NORM: "مان"}], + "بازیگرهایم": [{ORTH: "بازیگرها", NORM: "بازیگرها"}, {ORTH: "یم", NORM: "یم"}], + "بازی‌اش": [{ORTH: "بازی‌", NORM: "بازی‌"}, {ORTH: "اش", NORM: "اش"}], + "بالاست": [{ORTH: "بالا", NORM: "بالا"}, {ORTH: "ست", NORM: "ست"}], + "باورند": [{ORTH: "باور", NORM: "باور"}, {ORTH: "ند", NORM: "ند"}], + "بجاست": [{ORTH: "بجا", NORM: "بجا"}, {ORTH: "ست", NORM: "ست"}], + "بدان": [{ORTH: "ب", NORM: "ب"}, {ORTH: "دان", NORM: "دان"}], + "بدش": [{ORTH: "بد", NORM: "بد"}, {ORTH: "ش", NORM: "ش"}], + "بدشان": [{ORTH: "بد", NORM: "بد"}, {ORTH: "شان", NORM: "شان"}], + "بدنم": [{ORTH: "بدن", NORM: "بدن"}, {ORTH: "م", NORM: "م"}], + "بدهی‌ات": [{ORTH: "بدهی‌", NORM: "بدهی‌"}, {ORTH: "ات", NORM: "ات"}], + "بدین": [{ORTH: "ب", NORM: "ب"}, {ORTH: "دین", NORM: "دین"}], + "برابرش": [{ORTH: "برابر", NORM: "برابر"}, {ORTH: "ش", NORM: "ش"}], + "برادرت": [{ORTH: "برادر", NORM: "برادر"}, {ORTH: "ت", NORM: "ت"}], + "برادرش": [{ORTH: "برادر", NORM: "برادر"}, {ORTH: "ش", NORM: "ش"}], + "برایت": [{ORTH: "برای", NORM: "برای"}, {ORTH: "ت", NORM: "ت"}], + "برایتان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "تان", NORM: "تان"}], + "برایش": [{ORTH: "برای", NORM: "برای"}, {ORTH: "ش", NORM: "ش"}], + "برایشان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "شان", NORM: "شان"}], + "برایم": [{ORTH: "برای", NORM: "برای"}, {ORTH: "م", NORM: "م"}], + "برایمان": [{ORTH: "برای", NORM: "برای"}, {ORTH: "مان", NORM: "مان"}], + "برخوردارند": [{ORTH: "برخوردار", NORM: "برخوردار"}, {ORTH: "ند", NORM: "ند"}], + "برنامه‌سازهاست": [ + {ORTH: "برنامه‌سازها", NORM: "برنامه‌سازها"}, + {ORTH: "ست", NORM: "ست"}, + ], + "برهمش": [{ORTH: "برهم", NORM: "برهم"}, {ORTH: "ش", NORM: "ش"}], + "برهنه‌اش": [{ORTH: "برهنه‌", NORM: "برهنه‌"}, {ORTH: "اش", NORM: "اش"}], + "برگهایش": [{ORTH: "برگها", NORM: "برگها"}, {ORTH: "یش", NORM: "یش"}], + "برین": [{ORTH: "بر", NORM: "بر"}, {ORTH: "ین", NORM: "ین"}], + "بزرگش": [{ORTH: "بزرگ", NORM: "بزرگ"}, {ORTH: "ش", NORM: "ش"}], + "بزرگ‌تری": [{ORTH: "بزرگ‌تر", NORM: "بزرگ‌تر"}, {ORTH: "ی", NORM: "ی"}], + "بساطش": [{ORTH: "بساط", NORM: "بساط"}, {ORTH: "ش", NORM: "ش"}], + "بعدش": [{ORTH: "بعد", NORM: "بعد"}, {ORTH: "ش", NORM: "ش"}], + "بعضیهایشان": [{ORTH: "بعضیهای", NORM: "بعضیهای"}, {ORTH: "شان", NORM: "شان"}], + "بعضی‌شان": [{ORTH: "بعضی", NORM: "بعضی"}, {ORTH: "‌شان", NORM: "شان"}], + "بقیه‌اش": [{ORTH: "بقیه‌", NORM: "بقیه‌"}, {ORTH: "اش", NORM: "اش"}], + "بلندش": [{ORTH: "بلند", NORM: "بلند"}, {ORTH: "ش", NORM: "ش"}], + "بناگوشش": [{ORTH: "بناگوش", NORM: "بناگوش"}, {ORTH: "ش", NORM: "ش"}], + "بنظرم": [ + {ORTH: "ب", NORM: "ب"}, + {ORTH: "نظر", NORM: "نظر"}, + {ORTH: "م", NORM: "م"}, + ], + "بهت": [{ORTH: "به", NORM: "به"}, {ORTH: "ت", NORM: "ت"}], + "بهترش": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "ش", NORM: "ش"}], + "بهترم": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "م", NORM: "م"}], + "بهتری": [{ORTH: "بهتر", NORM: "بهتر"}, {ORTH: "ی", NORM: "ی"}], + "بهش": [{ORTH: "به", NORM: "به"}, {ORTH: "ش", NORM: "ش"}], + "به‌شان": [{ORTH: "به‌", NORM: "به‌"}, {ORTH: "شان", NORM: "شان"}], + "بودمش": [{ORTH: "بودم", NORM: "بودم"}, {ORTH: "ش", NORM: "ش"}], + "بودنش": [{ORTH: "بودن", NORM: "بودن"}, {ORTH: "ش", NORM: "ش"}], + "بودن‌شان": [{ORTH: "بودن‌", NORM: "بودن‌"}, {ORTH: "شان", NORM: "شان"}], + "بوستانش": [{ORTH: "بوستان", NORM: "بوستان"}, {ORTH: "ش", NORM: "ش"}], + "بویش": [{ORTH: "بو", NORM: "بو"}, {ORTH: "یش", NORM: "یش"}], + "بچه‌اش": [{ORTH: "بچه‌", NORM: "بچه‌"}, {ORTH: "اش", NORM: "اش"}], + "بچه‌م": [{ORTH: "بچه‌", NORM: "بچه‌"}, {ORTH: "م", NORM: "م"}], + "بچه‌هایش": [{ORTH: "بچه‌های", NORM: "بچه‌های"}, {ORTH: "ش", NORM: "ش"}], + "بیانیه‌شان": [{ORTH: "بیانیه‌", NORM: "بیانیه‌"}, {ORTH: "شان", NORM: "شان"}], + "بیدارم": [{ORTH: "بیدار", NORM: "بیدار"}, {ORTH: "م", NORM: "م"}], + "بیناتری": [{ORTH: "بیناتر", NORM: "بیناتر"}, {ORTH: "ی", NORM: "ی"}], + "بی‌اطلاعند": [{ORTH: "بی‌اطلاع", NORM: "بی‌اطلاع"}, {ORTH: "ند", NORM: "ند"}], + "بی‌اطلاعید": [{ORTH: "بی‌اطلاع", NORM: "بی‌اطلاع"}, {ORTH: "ید", NORM: "ید"}], + "بی‌بهره‌اند": [{ORTH: "بی‌بهره‌", NORM: "بی‌بهره‌"}, {ORTH: "اند", NORM: "اند"}], + "بی‌تفاوتند": [{ORTH: "بی‌تفاوت", NORM: "بی‌تفاوت"}, {ORTH: "ند", NORM: "ند"}], + "بی‌حسابش": [{ORTH: "بی‌حساب", NORM: "بی‌حساب"}, {ORTH: "ش", NORM: "ش"}], + "بی‌نیش": [{ORTH: "بی‌نی", NORM: "بی‌نی"}, {ORTH: "ش", NORM: "ش"}], + "تجربه‌هایم": [{ORTH: "تجربه‌ها", NORM: "تجربه‌ها"}, {ORTH: "یم", NORM: "یم"}], + "تحریم‌هاست": [{ORTH: "تحریم‌ها", NORM: "تحریم‌ها"}, {ORTH: "ست", NORM: "ست"}], + "تحولند": [{ORTH: "تحول", NORM: "تحول"}, {ORTH: "ند", NORM: "ند"}], + "تخیلی‌اش": [{ORTH: "تخیلی‌", NORM: "تخیلی‌"}, {ORTH: "اش", NORM: "اش"}], + "ترا": [{ORTH: "ت", NORM: "ت"}, {ORTH: "را", NORM: "را"}], + "ترسشان": [{ORTH: "ترس", NORM: "ترس"}, {ORTH: "شان", NORM: "شان"}], + "ترکش": [{ORTH: "ترک", NORM: "ترک"}, {ORTH: "ش", NORM: "ش"}], + "تشنه‌ت": [{ORTH: "تشنه‌", NORM: "تشنه‌"}, {ORTH: "ت", NORM: "ت"}], + "تشکیلاتی‌اش": [{ORTH: "تشکیلاتی‌", NORM: "تشکیلاتی‌"}, {ORTH: "اش", NORM: "اش"}], + "تعلقش": [{ORTH: "تعلق", NORM: "تعلق"}, {ORTH: "ش", NORM: "ش"}], + "تلاششان": [{ORTH: "تلاش", NORM: "تلاش"}, {ORTH: "شان", NORM: "شان"}], + "تلاشمان": [{ORTH: "تلاش", NORM: "تلاش"}, {ORTH: "مان", NORM: "مان"}], + "تماشاگرش": [{ORTH: "تماشاگر", NORM: "تماشاگر"}, {ORTH: "ش", NORM: "ش"}], + "تمامشان": [{ORTH: "تمام", NORM: "تمام"}, {ORTH: "شان", NORM: "شان"}], + "تنش": [{ORTH: "تن", NORM: "تن"}, {ORTH: "ش", NORM: "ش"}], + "تنمان": [{ORTH: "تن", NORM: "تن"}, {ORTH: "مان", NORM: "مان"}], + "تنهایی‌اش": [{ORTH: "تنهایی‌", NORM: "تنهایی‌"}, {ORTH: "اش", NORM: "اش"}], + "توانایی‌اش": [{ORTH: "توانایی‌", NORM: "توانایی‌"}, {ORTH: "اش", NORM: "اش"}], + "توجهش": [{ORTH: "توجه", NORM: "توجه"}, {ORTH: "ش", NORM: "ش"}], + "توست": [{ORTH: "تو", NORM: "تو"}, {ORTH: "ست", NORM: "ست"}], + "توصیه‌اش": [{ORTH: "توصیه‌", NORM: "توصیه‌"}, {ORTH: "اش", NORM: "اش"}], + "تیغه‌اش": [{ORTH: "تیغه‌", NORM: "تیغه‌"}, {ORTH: "اش", NORM: "اش"}], + "جاست": [{ORTH: "جا", NORM: "جا"}, {ORTH: "ست", NORM: "ست"}], + "جامعه‌اند": [{ORTH: "جامعه‌", NORM: "جامعه‌"}, {ORTH: "اند", NORM: "اند"}], + "جانم": [{ORTH: "جان", NORM: "جان"}, {ORTH: "م", NORM: "م"}], + "جایش": [{ORTH: "جای", NORM: "جای"}, {ORTH: "ش", NORM: "ش"}], + "جایشان": [{ORTH: "جای", NORM: "جای"}, {ORTH: "شان", NORM: "شان"}], + "جدیدش": [{ORTH: "جدید", NORM: "جدید"}, {ORTH: "ش", NORM: "ش"}], + "جرمزاست": [{ORTH: "جرمزا", NORM: "جرمزا"}, {ORTH: "ست", NORM: "ست"}], + "جلوست": [{ORTH: "جلو", NORM: "جلو"}, {ORTH: "ست", NORM: "ست"}], + "جلویش": [{ORTH: "جلوی", NORM: "جلوی"}, {ORTH: "ش", NORM: "ش"}], + "جمهوریست": [{ORTH: "جمهوری", NORM: "جمهوری"}, {ORTH: "ست", NORM: "ست"}], + "جنسش": [{ORTH: "جنس", NORM: "جنس"}, {ORTH: "ش", NORM: "ش"}], + "جنس‌اند": [{ORTH: "جنس‌", NORM: "جنس‌"}, {ORTH: "اند", NORM: "اند"}], + "جوانانش": [{ORTH: "جوانان", NORM: "جوانان"}, {ORTH: "ش", NORM: "ش"}], + "جویش": [{ORTH: "جوی", NORM: "جوی"}, {ORTH: "ش", NORM: "ش"}], + "جگرش": [{ORTH: "جگر", NORM: "جگر"}, {ORTH: "ش", NORM: "ش"}], + "حاضرم": [{ORTH: "حاضر", NORM: "حاضر"}, {ORTH: "م", NORM: "م"}], + "حالتهایشان": [{ORTH: "حالتهای", NORM: "حالتهای"}, {ORTH: "شان", NORM: "شان"}], + "حالیست": [{ORTH: "حالی", NORM: "حالی"}, {ORTH: "ست", NORM: "ست"}], + "حالی‌مان": [{ORTH: "حالی‌", NORM: "حالی‌"}, {ORTH: "مان", NORM: "مان"}], + "حاکیست": [{ORTH: "حاکی", NORM: "حاکی"}, {ORTH: "ست", NORM: "ست"}], + "حرامزادگی‌اش": [ + {ORTH: "حرامزادگی‌", NORM: "حرامزادگی‌"}, + {ORTH: "اش", NORM: "اش"}, + ], + "حرفتان": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "تان", NORM: "تان"}], + "حرفش": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "ش", NORM: "ش"}], + "حرفشان": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "شان", NORM: "شان"}], + "حرفم": [{ORTH: "حرف", NORM: "حرف"}, {ORTH: "م", NORM: "م"}], + "حرف‌های‌شان": [{ORTH: "حرف‌های‌", NORM: "حرف‌های‌"}, {ORTH: "شان", NORM: "شان"}], + "حرکتمان": [{ORTH: "حرکت", NORM: "حرکت"}, {ORTH: "مان", NORM: "مان"}], + "حریفانشان": [{ORTH: "حریفان", NORM: "حریفان"}, {ORTH: "شان", NORM: "شان"}], + "حضورشان": [{ORTH: "حضور", NORM: "حضور"}, {ORTH: "شان", NORM: "شان"}], + "حمایتش": [{ORTH: "حمایت", NORM: "حمایت"}, {ORTH: "ش", NORM: "ش"}], + "حواسش": [{ORTH: "حواس", NORM: "حواس"}, {ORTH: "ش", NORM: "ش"}], + "حواسشان": [{ORTH: "حواس", NORM: "حواس"}, {ORTH: "شان", NORM: "شان"}], + "حوصله‌مان": [{ORTH: "حوصله‌", NORM: "حوصله‌"}, {ORTH: "مان", NORM: "مان"}], + "حکومتش": [{ORTH: "حکومت", NORM: "حکومت"}, {ORTH: "ش", NORM: "ش"}], + "حکومتشان": [{ORTH: "حکومت", NORM: "حکومت"}, {ORTH: "شان", NORM: "شان"}], + "حیفم": [{ORTH: "حیف", NORM: "حیف"}, {ORTH: "م", NORM: "م"}], + "خاندانش": [{ORTH: "خاندان", NORM: "خاندان"}, {ORTH: "ش", NORM: "ش"}], + "خانه‌اش": [{ORTH: "خانه‌", NORM: "خانه‌"}, {ORTH: "اش", NORM: "اش"}], + "خانه‌شان": [{ORTH: "خانه‌", NORM: "خانه‌"}, {ORTH: "شان", NORM: "شان"}], + "خانه‌مان": [{ORTH: "خانه‌", NORM: "خانه‌"}, {ORTH: "مان", NORM: "مان"}], + "خانه‌هایشان": [{ORTH: "خانه‌های", NORM: "خانه‌های"}, {ORTH: "شان", NORM: "شان"}], + "خانواده‌ات": [{ORTH: "خانواده", NORM: "خانواده"}, {ORTH: "‌ات", NORM: "ات"}], + "خانواده‌اش": [{ORTH: "خانواده‌", NORM: "خانواده‌"}, {ORTH: "اش", NORM: "اش"}], + "خانواده‌ام": [{ORTH: "خانواده‌", NORM: "خانواده‌"}, {ORTH: "ام", NORM: "ام"}], + "خانواده‌شان": [{ORTH: "خانواده‌", NORM: "خانواده‌"}, {ORTH: "شان", NORM: "شان"}], + "خداست": [{ORTH: "خدا", NORM: "خدا"}, {ORTH: "ست", NORM: "ست"}], + "خدایش": [{ORTH: "خدا", NORM: "خدا"}, {ORTH: "یش", NORM: "یش"}], + "خدایشان": [{ORTH: "خدای", NORM: "خدای"}, {ORTH: "شان", NORM: "شان"}], + "خردسالش": [{ORTH: "خردسال", NORM: "خردسال"}, {ORTH: "ش", NORM: "ش"}], + "خروپفشان": [{ORTH: "خروپف", NORM: "خروپف"}, {ORTH: "شان", NORM: "شان"}], + "خسته‌ای": [{ORTH: "خسته‌", NORM: "خسته‌"}, {ORTH: "ای", NORM: "ای"}], + "خطت": [{ORTH: "خط", NORM: "خط"}, {ORTH: "ت", NORM: "ت"}], + "خوابمان": [{ORTH: "خواب", NORM: "خواب"}, {ORTH: "مان", NORM: "مان"}], + "خواندنش": [{ORTH: "خواندن", NORM: "خواندن"}, {ORTH: "ش", NORM: "ش"}], + "خواهرش": [{ORTH: "خواهر", NORM: "خواهر"}, {ORTH: "ش", NORM: "ش"}], + "خوبش": [{ORTH: "خوب", NORM: "خوب"}, {ORTH: "ش", NORM: "ش"}], + "خودت": [{ORTH: "خود", NORM: "خود"}, {ORTH: "ت", NORM: "ت"}], + "خودتان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "تان", NORM: "تان"}], + "خودش": [{ORTH: "خود", NORM: "خود"}, {ORTH: "ش", NORM: "ش"}], + "خودشان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "شان", NORM: "شان"}], + "خودمان": [{ORTH: "خود", NORM: "خود"}, {ORTH: "مان", NORM: "مان"}], + "خوردمان": [{ORTH: "خورد", NORM: "خورد"}, {ORTH: "مان", NORM: "مان"}], + "خوردنشان": [{ORTH: "خوردن", NORM: "خوردن"}, {ORTH: "شان", NORM: "شان"}], + "خوشش": [{ORTH: "خوش", NORM: "خوش"}, {ORTH: "ش", NORM: "ش"}], + "خوشوقتم": [{ORTH: "خوشوقت", NORM: "خوشوقت"}, {ORTH: "م", NORM: "م"}], + "خونشان": [{ORTH: "خون", NORM: "خون"}, {ORTH: "شان", NORM: "شان"}], + "خویش": [{ORTH: "خوی", NORM: "خوی"}, {ORTH: "ش", NORM: "ش"}], + "خویشتنم": [{ORTH: "خویشتن", NORM: "خویشتن"}, {ORTH: "م", NORM: "م"}], + "خیالش": [{ORTH: "خیال", NORM: "خیال"}, {ORTH: "ش", NORM: "ش"}], + "خیسش": [{ORTH: "خیس", NORM: "خیس"}, {ORTH: "ش", NORM: "ش"}], + "داراست": [{ORTH: "دارا", NORM: "دارا"}, {ORTH: "ست", NORM: "ست"}], + "داستانهایش": [{ORTH: "داستانهای", NORM: "داستانهای"}, {ORTH: "ش", NORM: "ش"}], + "دخترمان": [{ORTH: "دختر", NORM: "دختر"}, {ORTH: "مان", NORM: "مان"}], + "دخیلند": [{ORTH: "دخیل", NORM: "دخیل"}, {ORTH: "ند", NORM: "ند"}], + "درباره‌ات": [{ORTH: "درباره", NORM: "درباره"}, {ORTH: "‌ات", NORM: "ات"}], + "درباره‌اش": [{ORTH: "درباره‌", NORM: "درباره‌"}, {ORTH: "اش", NORM: "اش"}], + "دردش": [{ORTH: "درد", NORM: "درد"}, {ORTH: "ش", NORM: "ش"}], + "دردشان": [{ORTH: "درد", NORM: "درد"}, {ORTH: "شان", NORM: "شان"}], + "درسته": [{ORTH: "درست", NORM: "درست"}, {ORTH: "ه", NORM: "ه"}], + "درش": [{ORTH: "در", NORM: "در"}, {ORTH: "ش", NORM: "ش"}], + "درون‌شان": [{ORTH: "درون‌", NORM: "درون‌"}, {ORTH: "شان", NORM: "شان"}], + "درین": [{ORTH: "در", NORM: "در"}, {ORTH: "ین", NORM: "ین"}], + "دریچه‌هایش": [{ORTH: "دریچه‌های", NORM: "دریچه‌های"}, {ORTH: "ش", NORM: "ش"}], + "دزدانش": [{ORTH: "دزدان", NORM: "دزدان"}, {ORTH: "ش", NORM: "ش"}], + "دستت": [{ORTH: "دست", NORM: "دست"}, {ORTH: "ت", NORM: "ت"}], + "دستش": [{ORTH: "دست", NORM: "دست"}, {ORTH: "ش", NORM: "ش"}], + "دستمان": [{ORTH: "دست", NORM: "دست"}, {ORTH: "مان", NORM: "مان"}], + "دستهایشان": [{ORTH: "دستهای", NORM: "دستهای"}, {ORTH: "شان", NORM: "شان"}], + "دست‌یافتنی‌ست": [ + {ORTH: "دست‌یافتنی‌", NORM: "دست‌یافتنی‌"}, + {ORTH: "ست", NORM: "ست"}, + ], + "دشمنند": [{ORTH: "دشمن", NORM: "دشمن"}, {ORTH: "ند", NORM: "ند"}], + "دشمنیشان": [{ORTH: "دشمنی", NORM: "دشمنی"}, {ORTH: "شان", NORM: "شان"}], + "دشمنیم": [{ORTH: "دشمن", NORM: "دشمن"}, {ORTH: "یم", NORM: "یم"}], + "دفترش": [{ORTH: "دفتر", NORM: "دفتر"}, {ORTH: "ش", NORM: "ش"}], + "دفنشان": [{ORTH: "دفن", NORM: "دفن"}, {ORTH: "شان", NORM: "شان"}], + "دلت": [{ORTH: "دل", NORM: "دل"}, {ORTH: "ت", NORM: "ت"}], + "دلش": [{ORTH: "دل", NORM: "دل"}, {ORTH: "ش", NORM: "ش"}], + "دلشان": [{ORTH: "دل", NORM: "دل"}, {ORTH: "شان", NORM: "شان"}], + "دلم": [{ORTH: "دل", NORM: "دل"}, {ORTH: "م", NORM: "م"}], + "دلیلش": [{ORTH: "دلیل", NORM: "دلیل"}, {ORTH: "ش", NORM: "ش"}], + "دنبالش": [{ORTH: "دنبال", NORM: "دنبال"}, {ORTH: "ش", NORM: "ش"}], + "دنباله‌اش": [{ORTH: "دنباله‌", NORM: "دنباله‌"}, {ORTH: "اش", NORM: "اش"}], + "دهاتی‌هایش": [{ORTH: "دهاتی‌های", NORM: "دهاتی‌های"}, {ORTH: "ش", NORM: "ش"}], + "دهانت": [{ORTH: "دهان", NORM: "دهان"}, {ORTH: "ت", NORM: "ت"}], + "دهنش": [{ORTH: "دهن", NORM: "دهن"}, {ORTH: "ش", NORM: "ش"}], + "دورش": [{ORTH: "دور", NORM: "دور"}, {ORTH: "ش", NORM: "ش"}], + "دوروبریهاشان": [ + {ORTH: "دوروبریها", NORM: "دوروبریها"}, + {ORTH: "شان", NORM: "شان"}, + ], + "دوستانش": [{ORTH: "دوستان", NORM: "دوستان"}, {ORTH: "ش", NORM: "ش"}], + "دوستانشان": [{ORTH: "دوستان", NORM: "دوستان"}, {ORTH: "شان", NORM: "شان"}], + "دوستت": [{ORTH: "دوست", NORM: "دوست"}, {ORTH: "ت", NORM: "ت"}], + "دوستش": [{ORTH: "دوست", NORM: "دوست"}, {ORTH: "ش", NORM: "ش"}], + "دومش": [{ORTH: "دوم", NORM: "دوم"}, {ORTH: "ش", NORM: "ش"}], + "دویدنش": [{ORTH: "دویدن", NORM: "دویدن"}, {ORTH: "ش", NORM: "ش"}], + "دکورهایمان": [{ORTH: "دکورهای", NORM: "دکورهای"}, {ORTH: "مان", NORM: "مان"}], + "دیدگاهش": [{ORTH: "دیدگاه", NORM: "دیدگاه"}, {ORTH: "ش", NORM: "ش"}], + "دیرت": [{ORTH: "دیر", NORM: "دیر"}, {ORTH: "ت", NORM: "ت"}], + "دیرم": [{ORTH: "دیر", NORM: "دیر"}, {ORTH: "م", NORM: "م"}], + "دینت": [{ORTH: "دین", NORM: "دین"}, {ORTH: "ت", NORM: "ت"}], + "دینش": [{ORTH: "دین", NORM: "دین"}, {ORTH: "ش", NORM: "ش"}], + "دین‌شان": [{ORTH: "دین‌", NORM: "دین‌"}, {ORTH: "شان", NORM: "شان"}], + "دیواره‌هایش": [{ORTH: "دیواره‌های", NORM: "دیواره‌های"}, {ORTH: "ش", NORM: "ش"}], + "دیوانه‌ای": [{ORTH: "دیوانه‌", NORM: "دیوانه‌"}, {ORTH: "ای", NORM: "ای"}], + "دیوی": [{ORTH: "دیو", NORM: "دیو"}, {ORTH: "ی", NORM: "ی"}], + "دیگرم": [{ORTH: "دیگر", NORM: "دیگر"}, {ORTH: "م", NORM: "م"}], + "دیگرمان": [{ORTH: "دیگر", NORM: "دیگر"}, {ORTH: "مان", NORM: "مان"}], + "ذهنش": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "ش", NORM: "ش"}], + "ذهنشان": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "شان", NORM: "شان"}], + "ذهنم": [{ORTH: "ذهن", NORM: "ذهن"}, {ORTH: "م", NORM: "م"}], + "رئوسش": [{ORTH: "رئوس", NORM: "رئوس"}, {ORTH: "ش", NORM: "ش"}], + "راهشان": [{ORTH: "راه", NORM: "راه"}, {ORTH: "شان", NORM: "شان"}], + "راهگشاست": [{ORTH: "راهگشا", NORM: "راهگشا"}, {ORTH: "ست", NORM: "ست"}], + "رایانه‌هایشان": [ + {ORTH: "رایانه‌های", NORM: "رایانه‌های"}, + {ORTH: "شان", NORM: "شان"}, + ], + "رعایتشان": [{ORTH: "رعایت", NORM: "رعایت"}, {ORTH: "شان", NORM: "شان"}], + "رفتارش": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "ش", NORM: "ش"}], + "رفتارشان": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "شان", NORM: "شان"}], + "رفتارمان": [{ORTH: "رفتار", NORM: "رفتار"}, {ORTH: "مان", NORM: "مان"}], + "رفتارهاست": [{ORTH: "رفتارها", NORM: "رفتارها"}, {ORTH: "ست", NORM: "ست"}], + "رفتارهایشان": [{ORTH: "رفتارهای", NORM: "رفتارهای"}, {ORTH: "شان", NORM: "شان"}], + "رفقایم": [{ORTH: "رفقا", NORM: "رفقا"}, {ORTH: "یم", NORM: "یم"}], + "رقیق‌ترش": [{ORTH: "رقیق‌تر", NORM: "رقیق‌تر"}, {ORTH: "ش", NORM: "ش"}], + "رنجند": [{ORTH: "رنج", NORM: "رنج"}, {ORTH: "ند", NORM: "ند"}], + "رهگشاست": [{ORTH: "رهگشا", NORM: "رهگشا"}, {ORTH: "ست", NORM: "ست"}], + "رواست": [{ORTH: "روا", NORM: "روا"}, {ORTH: "ست", NORM: "ست"}], + "روبروست": [{ORTH: "روبرو", NORM: "روبرو"}, {ORTH: "ست", NORM: "ست"}], + "روحی‌اش": [{ORTH: "روحی‌", NORM: "روحی‌"}, {ORTH: "اش", NORM: "اش"}], + "روزنامه‌اش": [{ORTH: "روزنامه‌", NORM: "روزنامه‌"}, {ORTH: "اش", NORM: "اش"}], + "روزه‌ست": [{ORTH: "روزه‌", NORM: "روزه‌"}, {ORTH: "ست", NORM: "ست"}], + "روسری‌اش": [{ORTH: "روسری‌", NORM: "روسری‌"}, {ORTH: "اش", NORM: "اش"}], + "روشتان": [{ORTH: "روش", NORM: "روش"}, {ORTH: "تان", NORM: "تان"}], + "رویش": [{ORTH: "روی", NORM: "روی"}, {ORTH: "ش", NORM: "ش"}], + "زبانش": [{ORTH: "زبان", NORM: "زبان"}, {ORTH: "ش", NORM: "ش"}], + "زحماتشان": [{ORTH: "زحمات", NORM: "زحمات"}, {ORTH: "شان", NORM: "شان"}], + "زدنهایشان": [{ORTH: "زدنهای", NORM: "زدنهای"}, {ORTH: "شان", NORM: "شان"}], + "زرنگشان": [{ORTH: "زرنگ", NORM: "زرنگ"}, {ORTH: "شان", NORM: "شان"}], + "زشتش": [{ORTH: "زشت", NORM: "زشت"}, {ORTH: "ش", NORM: "ش"}], + "زشتکارانند": [{ORTH: "زشتکاران", NORM: "زشتکاران"}, {ORTH: "ند", NORM: "ند"}], + "زلفش": [{ORTH: "زلف", NORM: "زلف"}, {ORTH: "ش", NORM: "ش"}], + "زمن": [{ORTH: "ز", NORM: "ز"}, {ORTH: "من", NORM: "من"}], + "زنبوری‌اش": [{ORTH: "زنبوری‌", NORM: "زنبوری‌"}, {ORTH: "اش", NORM: "اش"}], + "زندانم": [{ORTH: "زندان", NORM: "زندان"}, {ORTH: "م", NORM: "م"}], + "زنده‌ام": [{ORTH: "زنده‌", NORM: "زنده‌"}, {ORTH: "ام", NORM: "ام"}], + "زندگانی‌اش": [{ORTH: "زندگانی‌", NORM: "زندگانی‌"}, {ORTH: "اش", NORM: "اش"}], + "زندگی‌اش": [{ORTH: "زندگی‌", NORM: "زندگی‌"}, {ORTH: "اش", NORM: "اش"}], + "زندگی‌ام": [{ORTH: "زندگی‌", NORM: "زندگی‌"}, {ORTH: "ام", NORM: "ام"}], + "زندگی‌شان": [{ORTH: "زندگی‌", NORM: "زندگی‌"}, {ORTH: "شان", NORM: "شان"}], + "زنش": [{ORTH: "زن", NORM: "زن"}, {ORTH: "ش", NORM: "ش"}], + "زنند": [{ORTH: "زن", NORM: "زن"}, {ORTH: "ند", NORM: "ند"}], + "زو": [{ORTH: "ز", NORM: "ز"}, {ORTH: "و", NORM: "و"}], + "زیاده": [{ORTH: "زیاد", NORM: "زیاد"}, {ORTH: "ه", NORM: "ه"}], + "زیباست": [{ORTH: "زیبا", NORM: "زیبا"}, {ORTH: "ست", NORM: "ست"}], + "زیبایش": [{ORTH: "زیبای", NORM: "زیبای"}, {ORTH: "ش", NORM: "ش"}], + "زیبایی": [{ORTH: "زیبای", NORM: "زیبای"}, {ORTH: "ی", NORM: "ی"}], + "زیربناست": [{ORTH: "زیربنا", NORM: "زیربنا"}, {ORTH: "ست", NORM: "ست"}], + "زیرک‌اند": [{ORTH: "زیرک‌", NORM: "زیرک‌"}, {ORTH: "اند", NORM: "اند"}], + "سؤالتان": [{ORTH: "سؤال", NORM: "سؤال"}, {ORTH: "تان", NORM: "تان"}], + "سؤالم": [{ORTH: "سؤال", NORM: "سؤال"}, {ORTH: "م", NORM: "م"}], + "سابقه‌اش": [{ORTH: "سابقه‌", NORM: "سابقه‌"}, {ORTH: "اش", NORM: "اش"}], + "ساختنم": [{ORTH: "ساختن", NORM: "ساختن"}, {ORTH: "م", NORM: "م"}], + "ساده‌اش": [{ORTH: "ساده‌", NORM: "ساده‌"}, {ORTH: "اش", NORM: "اش"}], + "ساده‌اند": [{ORTH: "ساده‌", NORM: "ساده‌"}, {ORTH: "اند", NORM: "اند"}], + "سازمانش": [{ORTH: "سازمان", NORM: "سازمان"}, {ORTH: "ش", NORM: "ش"}], + "ساعتم": [{ORTH: "ساعت", NORM: "ساعت"}, {ORTH: "م", NORM: "م"}], + "سالته": [ + {ORTH: "سال", NORM: "سال"}, + {ORTH: "ت", NORM: "ت"}, + {ORTH: "ه", NORM: "ه"}, + ], + "سالش": [{ORTH: "سال", NORM: "سال"}, {ORTH: "ش", NORM: "ش"}], + "سالهاست": [{ORTH: "سالها", NORM: "سالها"}, {ORTH: "ست", NORM: "ست"}], + "ساله‌اش": [{ORTH: "ساله‌", NORM: "ساله‌"}, {ORTH: "اش", NORM: "اش"}], + "ساکتند": [{ORTH: "ساکت", NORM: "ساکت"}, {ORTH: "ند", NORM: "ند"}], + "ساکنند": [{ORTH: "ساکن", NORM: "ساکن"}, {ORTH: "ند", NORM: "ند"}], + "سبزشان": [{ORTH: "سبز", NORM: "سبز"}, {ORTH: "شان", NORM: "شان"}], + "سبیل‌مان": [{ORTH: "سبیل‌", NORM: "سبیل‌"}, {ORTH: "مان", NORM: "مان"}], + "ستم‌هایش": [{ORTH: "ستم‌های", NORM: "ستم‌های"}, {ORTH: "ش", NORM: "ش"}], + "سخنانش": [{ORTH: "سخنان", NORM: "سخنان"}, {ORTH: "ش", NORM: "ش"}], + "سخنانشان": [{ORTH: "سخنان", NORM: "سخنان"}, {ORTH: "شان", NORM: "شان"}], + "سخنتان": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "تان", NORM: "تان"}], + "سخنش": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "ش", NORM: "ش"}], + "سخنم": [{ORTH: "سخن", NORM: "سخن"}, {ORTH: "م", NORM: "م"}], + "سردش": [{ORTH: "سرد", NORM: "سرد"}, {ORTH: "ش", NORM: "ش"}], + "سرزمینشان": [{ORTH: "سرزمین", NORM: "سرزمین"}, {ORTH: "شان", NORM: "شان"}], + "سرش": [{ORTH: "سر", NORM: "سر"}, {ORTH: "ش", NORM: "ش"}], + "سرمایه‌دارهاست": [ + {ORTH: "سرمایه‌دارها", NORM: "سرمایه‌دارها"}, + {ORTH: "ست", NORM: "ست"}, + ], + "سرنوشتش": [{ORTH: "سرنوشت", NORM: "سرنوشت"}, {ORTH: "ش", NORM: "ش"}], + "سرنوشتشان": [{ORTH: "سرنوشت", NORM: "سرنوشت"}, {ORTH: "شان", NORM: "شان"}], + "سروتهش": [{ORTH: "سروته", NORM: "سروته"}, {ORTH: "ش", NORM: "ش"}], + "سرچشمه‌اش": [{ORTH: "سرچشمه‌", NORM: "سرچشمه‌"}, {ORTH: "اش", NORM: "اش"}], + "سقمش": [{ORTH: "سقم", NORM: "سقم"}, {ORTH: "ش", NORM: "ش"}], + "سنش": [{ORTH: "سن", NORM: "سن"}, {ORTH: "ش", NORM: "ش"}], + "سپاهش": [{ORTH: "سپاه", NORM: "سپاه"}, {ORTH: "ش", NORM: "ش"}], + "سیاسیشان": [{ORTH: "سیاسی", NORM: "سیاسی"}, {ORTH: "شان", NORM: "شان"}], + "سیاه‌چاله‌هاست": [ + {ORTH: "سیاه‌چاله‌ها", NORM: "سیاه‌چاله‌ها"}, + {ORTH: "ست", NORM: "ست"}, + ], + "شاخه‌هایشان": [{ORTH: "شاخه‌های", NORM: "شاخه‌های"}, {ORTH: "شان", NORM: "شان"}], + "شالوده‌اش": [{ORTH: "شالوده‌", NORM: "شالوده‌"}, {ORTH: "اش", NORM: "اش"}], + "شانه‌هایش": [{ORTH: "شانه‌های", NORM: "شانه‌های"}, {ORTH: "ش", NORM: "ش"}], + "شاهدیم": [{ORTH: "شاهد", NORM: "شاهد"}, {ORTH: "یم", NORM: "یم"}], + "شاهکارهایش": [{ORTH: "شاهکارهای", NORM: "شاهکارهای"}, {ORTH: "ش", NORM: "ش"}], + "شخصیتش": [{ORTH: "شخصیت", NORM: "شخصیت"}, {ORTH: "ش", NORM: "ش"}], + "شدنشان": [{ORTH: "شدن", NORM: "شدن"}, {ORTH: "شان", NORM: "شان"}], + "شرکتیست": [{ORTH: "شرکتی", NORM: "شرکتی"}, {ORTH: "ست", NORM: "ست"}], + "شعارهاشان": [{ORTH: "شعارها", NORM: "شعارها"}, {ORTH: "شان", NORM: "شان"}], + "شعورش": [{ORTH: "شعور", NORM: "شعور"}, {ORTH: "ش", NORM: "ش"}], + "شغلش": [{ORTH: "شغل", NORM: "شغل"}, {ORTH: "ش", NORM: "ش"}], + "شماست": [{ORTH: "شما", NORM: "شما"}, {ORTH: "ست", NORM: "ست"}], + "شمشیرش": [{ORTH: "شمشیر", NORM: "شمشیر"}, {ORTH: "ش", NORM: "ش"}], + "شنیدنش": [{ORTH: "شنیدن", NORM: "شنیدن"}, {ORTH: "ش", NORM: "ش"}], + "شوراست": [{ORTH: "شورا", NORM: "شورا"}, {ORTH: "ست", NORM: "ست"}], + "شومت": [{ORTH: "شوم", NORM: "شوم"}, {ORTH: "ت", NORM: "ت"}], + "شیرینترش": [{ORTH: "شیرینتر", NORM: "شیرینتر"}, {ORTH: "ش", NORM: "ش"}], + "شیطان‌اند": [{ORTH: "شیطان‌", NORM: "شیطان‌"}, {ORTH: "اند", NORM: "اند"}], + "شیوه‌هاست": [{ORTH: "شیوه‌ها", NORM: "شیوه‌ها"}, {ORTH: "ست", NORM: "ست"}], + "صاحبش": [{ORTH: "صاحب", NORM: "صاحب"}, {ORTH: "ش", NORM: "ش"}], + "صحنه‌اش": [{ORTH: "صحنه‌", NORM: "صحنه‌"}, {ORTH: "اش", NORM: "اش"}], + "صدایش": [{ORTH: "صدای", NORM: "صدای"}, {ORTH: "ش", NORM: "ش"}], + "صددند": [{ORTH: "صدد", NORM: "صدد"}, {ORTH: "ند", NORM: "ند"}], + "صندوق‌هاست": [{ORTH: "صندوق‌ها", NORM: "صندوق‌ها"}, {ORTH: "ست", NORM: "ست"}], + "صندوق‌هایش": [{ORTH: "صندوق‌های", NORM: "صندوق‌های"}, {ORTH: "ش", NORM: "ش"}], + "صورتش": [{ORTH: "صورت", NORM: "صورت"}, {ORTH: "ش", NORM: "ش"}], + "ضروری‌اند": [{ORTH: "ضروری‌", NORM: "ضروری‌"}, {ORTH: "اند", NORM: "اند"}], + "ضمیرش": [{ORTH: "ضمیر", NORM: "ضمیر"}, {ORTH: "ش", NORM: "ش"}], + "طرفش": [{ORTH: "طرف", NORM: "طرف"}, {ORTH: "ش", NORM: "ش"}], + "طلسمش": [{ORTH: "طلسم", NORM: "طلسم"}, {ORTH: "ش", NORM: "ش"}], + "طوره": [{ORTH: "طور", NORM: "طور"}, {ORTH: "ه", NORM: "ه"}], + "عاشوراست": [{ORTH: "عاشورا", NORM: "عاشورا"}, {ORTH: "ست", NORM: "ست"}], + "عبارتند": [{ORTH: "عبارت", NORM: "عبارت"}, {ORTH: "ند", NORM: "ند"}], + "عزیزانتان": [{ORTH: "عزیزان", NORM: "عزیزان"}, {ORTH: "تان", NORM: "تان"}], + "عزیزانش": [{ORTH: "عزیزان", NORM: "عزیزان"}, {ORTH: "ش", NORM: "ش"}], + "عزیزش": [{ORTH: "عزیز", NORM: "عزیز"}, {ORTH: "ش", NORM: "ش"}], + "عشرت‌طلبی‌اش": [ + {ORTH: "عشرت‌طلبی‌", NORM: "عشرت‌طلبی‌"}, + {ORTH: "اش", NORM: "اش"}, + ], + "عقبیم": [{ORTH: "عقب", NORM: "عقب"}, {ORTH: "یم", NORM: "یم"}], + "علاقه‌اش": [{ORTH: "علاقه‌", NORM: "علاقه‌"}, {ORTH: "اش", NORM: "اش"}], + "علمیمان": [{ORTH: "علمی", NORM: "علمی"}, {ORTH: "مان", NORM: "مان"}], + "عمرش": [{ORTH: "عمر", NORM: "عمر"}, {ORTH: "ش", NORM: "ش"}], + "عمرشان": [{ORTH: "عمر", NORM: "عمر"}, {ORTH: "شان", NORM: "شان"}], + "عملش": [{ORTH: "عمل", NORM: "عمل"}, {ORTH: "ش", NORM: "ش"}], + "عملی‌اند": [{ORTH: "عملی‌", NORM: "عملی‌"}, {ORTH: "اند", NORM: "اند"}], + "عمویت": [{ORTH: "عموی", NORM: "عموی"}, {ORTH: "ت", NORM: "ت"}], + "عمویش": [{ORTH: "عموی", NORM: "عموی"}, {ORTH: "ش", NORM: "ش"}], + "عمیقش": [{ORTH: "عمیق", NORM: "عمیق"}, {ORTH: "ش", NORM: "ش"}], + "عواملش": [{ORTH: "عوامل", NORM: "عوامل"}, {ORTH: "ش", NORM: "ش"}], + "عوضشان": [{ORTH: "عوض", NORM: "عوض"}, {ORTH: "شان", NORM: "شان"}], + "غذایی‌شان": [{ORTH: "غذایی‌", NORM: "غذایی‌"}, {ORTH: "شان", NORM: "شان"}], + "غریبه‌اند": [{ORTH: "غریبه‌", NORM: "غریبه‌"}, {ORTH: "اند", NORM: "اند"}], + "غلامانش": [{ORTH: "غلامان", NORM: "غلامان"}, {ORTH: "ش", NORM: "ش"}], + "غلطهاست": [{ORTH: "غلطها", NORM: "غلطها"}, {ORTH: "ست", NORM: "ست"}], + "فراموشتان": [{ORTH: "فراموش", NORM: "فراموش"}, {ORTH: "تان", NORM: "تان"}], + "فردی‌اند": [{ORTH: "فردی‌", NORM: "فردی‌"}, {ORTH: "اند", NORM: "اند"}], + "فرزندانش": [{ORTH: "فرزندان", NORM: "فرزندان"}, {ORTH: "ش", NORM: "ش"}], + "فرزندش": [{ORTH: "فرزند", NORM: "فرزند"}, {ORTH: "ش", NORM: "ش"}], + "فرم‌هایش": [{ORTH: "فرم‌های", NORM: "فرم‌های"}, {ORTH: "ش", NORM: "ش"}], + "فرهنگی‌مان": [{ORTH: "فرهنگی‌", NORM: "فرهنگی‌"}, {ORTH: "مان", NORM: "مان"}], + "فریادشان": [{ORTH: "فریاد", NORM: "فریاد"}, {ORTH: "شان", NORM: "شان"}], + "فضایی‌شان": [{ORTH: "فضایی‌", NORM: "فضایی‌"}, {ORTH: "شان", NORM: "شان"}], + "فقیرشان": [{ORTH: "فقیر", NORM: "فقیر"}, {ORTH: "شان", NORM: "شان"}], + "فوری‌شان": [{ORTH: "فوری‌", NORM: "فوری‌"}, {ORTH: "شان", NORM: "شان"}], + "قائلند": [{ORTH: "قائل", NORM: "قائل"}, {ORTH: "ند", NORM: "ند"}], + "قائلیم": [{ORTH: "قائل", NORM: "قائل"}, {ORTH: "یم", NORM: "یم"}], + "قادرند": [{ORTH: "قادر", NORM: "قادر"}, {ORTH: "ند", NORM: "ند"}], + "قانونمندش": [{ORTH: "قانونمند", NORM: "قانونمند"}, {ORTH: "ش", NORM: "ش"}], + "قبلند": [{ORTH: "قبل", NORM: "قبل"}, {ORTH: "ند", NORM: "ند"}], + "قبلی‌اش": [{ORTH: "قبلی‌", NORM: "قبلی‌"}, {ORTH: "اش", NORM: "اش"}], + "قبلی‌مان": [{ORTH: "قبلی‌", NORM: "قبلی‌"}, {ORTH: "مان", NORM: "مان"}], + "قدریست": [{ORTH: "قدری", NORM: "قدری"}, {ORTH: "ست", NORM: "ست"}], + "قدمش": [{ORTH: "قدم", NORM: "قدم"}, {ORTH: "ش", NORM: "ش"}], + "قسمتش": [{ORTH: "قسمت", NORM: "قسمت"}, {ORTH: "ش", NORM: "ش"}], + "قضایاست": [{ORTH: "قضایا", NORM: "قضایا"}, {ORTH: "ست", NORM: "ست"}], + "قضیه‌شان": [{ORTH: "قضیه‌", NORM: "قضیه‌"}, {ORTH: "شان", NORM: "شان"}], + "قهرمانهایشان": [ + {ORTH: "قهرمانهای", NORM: "قهرمانهای"}, + {ORTH: "شان", NORM: "شان"}, + ], + "قهرمانیش": [{ORTH: "قهرمانی", NORM: "قهرمانی"}, {ORTH: "ش", NORM: "ش"}], + "قومت": [{ORTH: "قوم", NORM: "قوم"}, {ORTH: "ت", NORM: "ت"}], + "لازمه‌اش": [{ORTH: "لازمه‌", NORM: "لازمه‌"}, {ORTH: "اش", NORM: "اش"}], + "مأموریتش": [{ORTH: "مأموریت", NORM: "مأموریت"}, {ORTH: "ش", NORM: "ش"}], + "مأموریتم": [{ORTH: "مأموریت", NORM: "مأموریت"}, {ORTH: "م", NORM: "م"}], + "مأموریت‌اند": [{ORTH: "مأموریت‌", NORM: "مأموریت‌"}, {ORTH: "اند", NORM: "اند"}], + "مادرانشان": [{ORTH: "مادران", NORM: "مادران"}, {ORTH: "شان", NORM: "شان"}], + "مادرت": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "ت", NORM: "ت"}], + "مادرش": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "ش", NORM: "ش"}], + "مادرم": [{ORTH: "مادر", NORM: "مادر"}, {ORTH: "م", NORM: "م"}], + "ماست": [{ORTH: "ما", NORM: "ما"}, {ORTH: "ست", NORM: "ست"}], + "مالی‌اش": [{ORTH: "مالی‌", NORM: "مالی‌"}, {ORTH: "اش", NORM: "اش"}], + "ماهیتش": [{ORTH: "ماهیت", NORM: "ماهیت"}, {ORTH: "ش", NORM: "ش"}], + "مایی": [{ORTH: "ما", NORM: "ما"}, {ORTH: "یی", NORM: "یی"}], + "مجازاتش": [{ORTH: "مجازات", NORM: "مجازات"}, {ORTH: "ش", NORM: "ش"}], + "مجبورند": [{ORTH: "مجبور", NORM: "مجبور"}, {ORTH: "ند", NORM: "ند"}], + "محتاجند": [{ORTH: "محتاج", NORM: "محتاج"}, {ORTH: "ند", NORM: "ند"}], + "محرمم": [{ORTH: "محرم", NORM: "محرم"}, {ORTH: "م", NORM: "م"}], + "محلش": [{ORTH: "محل", NORM: "محل"}, {ORTH: "ش", NORM: "ش"}], + "مخالفند": [{ORTH: "مخالف", NORM: "مخالف"}, {ORTH: "ند", NORM: "ند"}], + "مخدرش": [{ORTH: "مخدر", NORM: "مخدر"}, {ORTH: "ش", NORM: "ش"}], + "مدتهاست": [{ORTH: "مدتها", NORM: "مدتها"}, {ORTH: "ست", NORM: "ست"}], + "مدرسه‌ات": [{ORTH: "مدرسه", NORM: "مدرسه"}, {ORTH: "‌ات", NORM: "ات"}], + "مدرکم": [{ORTH: "مدرک", NORM: "مدرک"}, {ORTH: "م", NORM: "م"}], + "مدیرانش": [{ORTH: "مدیران", NORM: "مدیران"}, {ORTH: "ش", NORM: "ش"}], + "مدیونم": [{ORTH: "مدیون", NORM: "مدیون"}, {ORTH: "م", NORM: "م"}], + "مذهبی‌اند": [{ORTH: "مذهبی‌", NORM: "مذهبی‌"}, {ORTH: "اند", NORM: "اند"}], + "مرا": [{ORTH: "م", NORM: "م"}, {ORTH: "را", NORM: "را"}], + "مرادت": [{ORTH: "مراد", NORM: "مراد"}, {ORTH: "ت", NORM: "ت"}], + "مردمشان": [{ORTH: "مردم", NORM: "مردم"}, {ORTH: "شان", NORM: "شان"}], + "مردمند": [{ORTH: "مردم", NORM: "مردم"}, {ORTH: "ند", NORM: "ند"}], + "مردم‌اند": [{ORTH: "مردم‌", NORM: "مردم‌"}, {ORTH: "اند", NORM: "اند"}], + "مرزشان": [{ORTH: "مرز", NORM: "مرز"}, {ORTH: "شان", NORM: "شان"}], + "مرزهاشان": [{ORTH: "مرزها", NORM: "مرزها"}, {ORTH: "شان", NORM: "شان"}], + "مزدورش": [{ORTH: "مزدور", NORM: "مزدور"}, {ORTH: "ش", NORM: "ش"}], + "مسئولیتش": [{ORTH: "مسئولیت", NORM: "مسئولیت"}, {ORTH: "ش", NORM: "ش"}], + "مسائلش": [{ORTH: "مسائل", NORM: "مسائل"}, {ORTH: "ش", NORM: "ش"}], + "مستحضرید": [{ORTH: "مستحضر", NORM: "مستحضر"}, {ORTH: "ید", NORM: "ید"}], + "مسلمانم": [{ORTH: "مسلمان", NORM: "مسلمان"}, {ORTH: "م", NORM: "م"}], + "مسلمانند": [{ORTH: "مسلمان", NORM: "مسلمان"}, {ORTH: "ند", NORM: "ند"}], + "مشتریانش": [{ORTH: "مشتریان", NORM: "مشتریان"}, {ORTH: "ش", NORM: "ش"}], + "مشتهایمان": [{ORTH: "مشتهای", NORM: "مشتهای"}, {ORTH: "مان", NORM: "مان"}], + "مشخصند": [{ORTH: "مشخص", NORM: "مشخص"}, {ORTH: "ند", NORM: "ند"}], + "مشغولند": [{ORTH: "مشغول", NORM: "مشغول"}, {ORTH: "ند", NORM: "ند"}], + "مشغولیم": [{ORTH: "مشغول", NORM: "مشغول"}, {ORTH: "یم", NORM: "یم"}], + "مشهورش": [{ORTH: "مشهور", NORM: "مشهور"}, {ORTH: "ش", NORM: "ش"}], + "مشکلاتشان": [{ORTH: "مشکلات", NORM: "مشکلات"}, {ORTH: "شان", NORM: "شان"}], + "مشکلم": [{ORTH: "مشکل", NORM: "مشکل"}, {ORTH: "م", NORM: "م"}], + "مطمئنم": [{ORTH: "مطمئن", NORM: "مطمئن"}, {ORTH: "م", NORM: "م"}], + "معامله‌مان": [{ORTH: "معامله‌", NORM: "معامله‌"}, {ORTH: "مان", NORM: "مان"}], + "معتقدم": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "م", NORM: "م"}], + "معتقدند": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "ند", NORM: "ند"}], + "معتقدیم": [{ORTH: "معتقد", NORM: "معتقد"}, {ORTH: "یم", NORM: "یم"}], + "معرفی‌اش": [{ORTH: "معرفی‌", NORM: "معرفی‌"}, {ORTH: "اش", NORM: "اش"}], + "معروفش": [{ORTH: "معروف", NORM: "معروف"}, {ORTH: "ش", NORM: "ش"}], + "معضلاتمان": [{ORTH: "معضلات", NORM: "معضلات"}, {ORTH: "مان", NORM: "مان"}], + "معلمش": [{ORTH: "معلم", NORM: "معلم"}, {ORTH: "ش", NORM: "ش"}], + "معنایش": [{ORTH: "معنای", NORM: "معنای"}, {ORTH: "ش", NORM: "ش"}], + "مغزشان": [{ORTH: "مغز", NORM: "مغز"}, {ORTH: "شان", NORM: "شان"}], + "مفیدند": [{ORTH: "مفید", NORM: "مفید"}, {ORTH: "ند", NORM: "ند"}], + "مقابلش": [{ORTH: "مقابل", NORM: "مقابل"}, {ORTH: "ش", NORM: "ش"}], + "مقاله‌اش": [{ORTH: "مقاله‌", NORM: "مقاله‌"}, {ORTH: "اش", NORM: "اش"}], + "مقدمش": [{ORTH: "مقدم", NORM: "مقدم"}, {ORTH: "ش", NORM: "ش"}], + "مقرش": [{ORTH: "مقر", NORM: "مقر"}, {ORTH: "ش", NORM: "ش"}], + "مقصدشان": [{ORTH: "مقصد", NORM: "مقصد"}, {ORTH: "شان", NORM: "شان"}], + "مقصرند": [{ORTH: "مقصر", NORM: "مقصر"}, {ORTH: "ند", NORM: "ند"}], + "مقصودتان": [{ORTH: "مقصود", NORM: "مقصود"}, {ORTH: "تان", NORM: "تان"}], + "ملاقاتهایش": [{ORTH: "ملاقاتهای", NORM: "ملاقاتهای"}, {ORTH: "ش", NORM: "ش"}], + "ممکنشان": [{ORTH: "ممکن", NORM: "ممکن"}, {ORTH: "شان", NORM: "شان"}], + "ممیزیهاست": [{ORTH: "ممیزیها", NORM: "ممیزیها"}, {ORTH: "ست", NORM: "ست"}], + "منظورم": [{ORTH: "منظور", NORM: "منظور"}, {ORTH: "م", NORM: "م"}], + "منی": [{ORTH: "من", NORM: "من"}, {ORTH: "ی", NORM: "ی"}], + "منید": [{ORTH: "من", NORM: "من"}, {ORTH: "ید", NORM: "ید"}], + "مهربانش": [{ORTH: "مهربان", NORM: "مهربان"}, {ORTH: "ش", NORM: "ش"}], + "مهم‌اند": [{ORTH: "مهم‌", NORM: "مهم‌"}, {ORTH: "اند", NORM: "اند"}], + "مواجهند": [{ORTH: "مواجه", NORM: "مواجه"}, {ORTH: "ند", NORM: "ند"}], + "مواجه‌اند": [{ORTH: "مواجه‌", NORM: "مواجه‌"}, {ORTH: "اند", NORM: "اند"}], + "مواخذه‌ات": [{ORTH: "مواخذه", NORM: "مواخذه"}, {ORTH: "‌ات", NORM: "ات"}], + "مواضعشان": [{ORTH: "مواضع", NORM: "مواضع"}, {ORTH: "شان", NORM: "شان"}], + "مواضعمان": [{ORTH: "مواضع", NORM: "مواضع"}, {ORTH: "مان", NORM: "مان"}], + "موافقند": [{ORTH: "موافق", NORM: "موافق"}, {ORTH: "ند", NORM: "ند"}], + "موجوداتش": [{ORTH: "موجودات", NORM: "موجودات"}, {ORTH: "ش", NORM: "ش"}], + "موجودند": [{ORTH: "موجود", NORM: "موجود"}, {ORTH: "ند", NORM: "ند"}], + "موردش": [{ORTH: "مورد", NORM: "مورد"}, {ORTH: "ش", NORM: "ش"}], + "موضعشان": [{ORTH: "موضع", NORM: "موضع"}, {ORTH: "شان", NORM: "شان"}], + "موظفند": [{ORTH: "موظف", NORM: "موظف"}, {ORTH: "ند", NORM: "ند"}], + "موهایش": [{ORTH: "موهای", NORM: "موهای"}, {ORTH: "ش", NORM: "ش"}], + "موهایمان": [{ORTH: "موهای", NORM: "موهای"}, {ORTH: "مان", NORM: "مان"}], + "مویم": [{ORTH: "مو", NORM: "مو"}, {ORTH: "یم", NORM: "یم"}], + "ناخرسندند": [{ORTH: "ناخرسند", NORM: "ناخرسند"}, {ORTH: "ند", NORM: "ند"}], + "ناراحتیش": [{ORTH: "ناراحتی", NORM: "ناراحتی"}, {ORTH: "ش", NORM: "ش"}], + "ناراضی‌اند": [{ORTH: "ناراضی‌", NORM: "ناراضی‌"}, {ORTH: "اند", NORM: "اند"}], + "نارواست": [{ORTH: "ناروا", NORM: "ناروا"}, {ORTH: "ست", NORM: "ست"}], + "نازش": [{ORTH: "ناز", NORM: "ناز"}, {ORTH: "ش", NORM: "ش"}], + "نامش": [{ORTH: "نام", NORM: "نام"}, {ORTH: "ش", NORM: "ش"}], + "نامشان": [{ORTH: "نام", NORM: "نام"}, {ORTH: "شان", NORM: "شان"}], + "نامم": [{ORTH: "نام", NORM: "نام"}, {ORTH: "م", NORM: "م"}], + "نامه‌ات": [{ORTH: "نامه", NORM: "نامه"}, {ORTH: "‌ات", NORM: "ات"}], + "نامه‌ام": [{ORTH: "نامه‌", NORM: "نامه‌"}, {ORTH: "ام", NORM: "ام"}], + "ناچارم": [{ORTH: "ناچار", NORM: "ناچار"}, {ORTH: "م", NORM: "م"}], + "نخست‌وزیری‌اش": [ + {ORTH: "نخست‌وزیری‌", NORM: "نخست‌وزیری‌"}, + {ORTH: "اش", NORM: "اش"}, + ], + "نزدش": [{ORTH: "نزد", NORM: "نزد"}, {ORTH: "ش", NORM: "ش"}], + "نشانم": [{ORTH: "نشان", NORM: "نشان"}, {ORTH: "م", NORM: "م"}], + "نظرات‌شان": [{ORTH: "نظرات‌", NORM: "نظرات‌"}, {ORTH: "شان", NORM: "شان"}], + "نظرتان": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "تان", NORM: "تان"}], + "نظرش": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "ش", NORM: "ش"}], + "نظرشان": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "شان", NORM: "شان"}], + "نظرم": [{ORTH: "نظر", NORM: "نظر"}, {ORTH: "م", NORM: "م"}], + "نظرهایشان": [{ORTH: "نظرهای", NORM: "نظرهای"}, {ORTH: "شان", NORM: "شان"}], + "نفاقش": [{ORTH: "نفاق", NORM: "نفاق"}, {ORTH: "ش", NORM: "ش"}], + "نفرند": [{ORTH: "نفر", NORM: "نفر"}, {ORTH: "ند", NORM: "ند"}], + "نفوذیند": [{ORTH: "نفوذی", NORM: "نفوذی"}, {ORTH: "ند", NORM: "ند"}], + "نقطه‌نظراتتان": [ + {ORTH: "نقطه‌نظرات", NORM: "نقطه‌نظرات"}, + {ORTH: "تان", NORM: "تان"}, + ], + "نمایشی‌مان": [{ORTH: "نمایشی‌", NORM: "نمایشی‌"}, {ORTH: "مان", NORM: "مان"}], + "نمایندگی‌شان": [ + {ORTH: "نمایندگی‌", NORM: "نمایندگی‌"}, + {ORTH: "شان", NORM: "شان"}, + ], + "نمونه‌اش": [{ORTH: "نمونه‌", NORM: "نمونه‌"}, {ORTH: "اش", NORM: "اش"}], + "نمی‌پذیرندش": [{ORTH: "نمی‌پذیرند", NORM: "نمی‌پذیرند"}, {ORTH: "ش", NORM: "ش"}], + "نوآوری‌اش": [{ORTH: "نوآوری‌", NORM: "نوآوری‌"}, {ORTH: "اش", NORM: "اش"}], + "نوشته‌هایشان": [ + {ORTH: "نوشته‌های", NORM: "نوشته‌های"}, + {ORTH: "شان", NORM: "شان"}, + ], + "نوشته‌هایم": [{ORTH: "نوشته‌ها", NORM: "نوشته‌ها"}, {ORTH: "یم", NORM: "یم"}], + "نکردنشان": [{ORTH: "نکردن", NORM: "نکردن"}, {ORTH: "شان", NORM: "شان"}], + "نگاهداری‌شان": [ + {ORTH: "نگاهداری‌", NORM: "نگاهداری‌"}, + {ORTH: "شان", NORM: "شان"}, + ], + "نگاهش": [{ORTH: "نگاه", NORM: "نگاه"}, {ORTH: "ش", NORM: "ش"}], + "نگرانم": [{ORTH: "نگران", NORM: "نگران"}, {ORTH: "م", NORM: "م"}], + "نگرشهایشان": [{ORTH: "نگرشهای", NORM: "نگرشهای"}, {ORTH: "شان", NORM: "شان"}], + "نیازمندند": [{ORTH: "نیازمند", NORM: "نیازمند"}, {ORTH: "ند", NORM: "ند"}], + "هدفش": [{ORTH: "هدف", NORM: "هدف"}, {ORTH: "ش", NORM: "ش"}], + "همانست": [{ORTH: "همان", NORM: "همان"}, {ORTH: "ست", NORM: "ست"}], + "همراهش": [{ORTH: "همراه", NORM: "همراه"}, {ORTH: "ش", NORM: "ش"}], + "همسرتان": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "تان", NORM: "تان"}], + "همسرش": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "ش", NORM: "ش"}], + "همسرم": [{ORTH: "همسر", NORM: "همسر"}, {ORTH: "م", NORM: "م"}], + "همفکرانش": [{ORTH: "همفکران", NORM: "همفکران"}, {ORTH: "ش", NORM: "ش"}], + "همه‌اش": [{ORTH: "همه‌", NORM: "همه‌"}, {ORTH: "اش", NORM: "اش"}], + "همه‌شان": [{ORTH: "همه‌", NORM: "همه‌"}, {ORTH: "شان", NORM: "شان"}], + "همکارانش": [{ORTH: "همکاران", NORM: "همکاران"}, {ORTH: "ش", NORM: "ش"}], + "هم‌نظریم": [{ORTH: "هم‌نظر", NORM: "هم‌نظر"}, {ORTH: "یم", NORM: "یم"}], + "هنرش": [{ORTH: "هنر", NORM: "هنر"}, {ORTH: "ش", NORM: "ش"}], + "هواست": [{ORTH: "هوا", NORM: "هوا"}, {ORTH: "ست", NORM: "ست"}], + "هویتش": [{ORTH: "هویت", NORM: "هویت"}, {ORTH: "ش", NORM: "ش"}], + "وابسته‌اند": [{ORTH: "وابسته‌", NORM: "وابسته‌"}, {ORTH: "اند", NORM: "اند"}], + "واقفند": [{ORTH: "واقف", NORM: "واقف"}, {ORTH: "ند", NORM: "ند"}], + "والدینشان": [{ORTH: "والدین", NORM: "والدین"}, {ORTH: "شان", NORM: "شان"}], + "وجدان‌تان": [{ORTH: "وجدان‌", NORM: "وجدان‌"}, {ORTH: "تان", NORM: "تان"}], + "وجودشان": [{ORTH: "وجود", NORM: "وجود"}, {ORTH: "شان", NORM: "شان"}], + "وطنم": [{ORTH: "وطن", NORM: "وطن"}, {ORTH: "م", NORM: "م"}], + "وعده‌اش": [{ORTH: "وعده‌", NORM: "وعده‌"}, {ORTH: "اش", NORM: "اش"}], + "وقتمان": [{ORTH: "وقت", NORM: "وقت"}, {ORTH: "مان", NORM: "مان"}], + "ولادتش": [{ORTH: "ولادت", NORM: "ولادت"}, {ORTH: "ش", NORM: "ش"}], + "پایانش": [{ORTH: "پایان", NORM: "پایان"}, {ORTH: "ش", NORM: "ش"}], + "پایش": [{ORTH: "پای", NORM: "پای"}, {ORTH: "ش", NORM: "ش"}], + "پایین‌ترند": [{ORTH: "پایین‌تر", NORM: "پایین‌تر"}, {ORTH: "ند", NORM: "ند"}], + "پدرت": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "ت", NORM: "ت"}], + "پدرش": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "ش", NORM: "ش"}], + "پدرشان": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "شان", NORM: "شان"}], + "پدرم": [{ORTH: "پدر", NORM: "پدر"}, {ORTH: "م", NORM: "م"}], + "پربارش": [{ORTH: "پربار", NORM: "پربار"}, {ORTH: "ش", NORM: "ش"}], + "پروردگارت": [{ORTH: "پروردگار", NORM: "پروردگار"}, {ORTH: "ت", NORM: "ت"}], + "پسرتان": [{ORTH: "پسر", NORM: "پسر"}, {ORTH: "تان", NORM: "تان"}], + "پسرش": [{ORTH: "پسر", NORM: "پسر"}, {ORTH: "ش", NORM: "ش"}], + "پسرعمویش": [{ORTH: "پسرعموی", NORM: "پسرعموی"}, {ORTH: "ش", NORM: "ش"}], + "پسر‌عمویت": [{ORTH: "پسر‌عموی", NORM: "پسر‌عموی"}, {ORTH: "ت", NORM: "ت"}], + "پشتش": [{ORTH: "پشت", NORM: "پشت"}, {ORTH: "ش", NORM: "ش"}], + "پشیمونی": [{ORTH: "پشیمون", NORM: "پشیمون"}, {ORTH: "ی", NORM: "ی"}], + "پولش": [{ORTH: "پول", NORM: "پول"}, {ORTH: "ش", NORM: "ش"}], + "پژوهش‌هایش": [{ORTH: "پژوهش‌های", NORM: "پژوهش‌های"}, {ORTH: "ش", NORM: "ش"}], + "پیامبرش": [{ORTH: "پیامبر", NORM: "پیامبر"}, {ORTH: "ش", NORM: "ش"}], + "پیامبری": [{ORTH: "پیامبر", NORM: "پیامبر"}, {ORTH: "ی", NORM: "ی"}], + "پیامش": [{ORTH: "پیام", NORM: "پیام"}, {ORTH: "ش", NORM: "ش"}], + "پیداست": [{ORTH: "پیدا", NORM: "پیدا"}, {ORTH: "ست", NORM: "ست"}], + "پیراهنش": [{ORTH: "پیراهن", NORM: "پیراهن"}, {ORTH: "ش", NORM: "ش"}], + "پیروانش": [{ORTH: "پیروان", NORM: "پیروان"}, {ORTH: "ش", NORM: "ش"}], + "پیشانی‌اش": [{ORTH: "پیشانی‌", NORM: "پیشانی‌"}, {ORTH: "اش", NORM: "اش"}], + "پیمانت": [{ORTH: "پیمان", NORM: "پیمان"}, {ORTH: "ت", NORM: "ت"}], + "پیوندشان": [{ORTH: "پیوند", NORM: "پیوند"}, {ORTH: "شان", NORM: "شان"}], + "چاپش": [{ORTH: "چاپ", NORM: "چاپ"}, {ORTH: "ش", NORM: "ش"}], + "چت": [{ORTH: "چ", NORM: "چ"}, {ORTH: "ت", NORM: "ت"}], + "چته": [{ORTH: "چ", NORM: "چ"}, {ORTH: "ت", NORM: "ت"}, {ORTH: "ه", NORM: "ه"}], + "چرخ‌هایش": [{ORTH: "چرخ‌های", NORM: "چرخ‌های"}, {ORTH: "ش", NORM: "ش"}], + "چشمم": [{ORTH: "چشم", NORM: "چشم"}, {ORTH: "م", NORM: "م"}], + "چشمهایش": [{ORTH: "چشمهای", NORM: "چشمهای"}, {ORTH: "ش", NORM: "ش"}], + "چشمهایشان": [{ORTH: "چشمهای", NORM: "چشمهای"}, {ORTH: "شان", NORM: "شان"}], + "چمنم": [{ORTH: "چمن", NORM: "چمن"}, {ORTH: "م", NORM: "م"}], + "چهره‌اش": [{ORTH: "چهره‌", NORM: "چهره‌"}, {ORTH: "اش", NORM: "اش"}], + "چکاره‌اند": [{ORTH: "چکاره‌", NORM: "چکاره‌"}, {ORTH: "اند", NORM: "اند"}], + "چیزهاست": [{ORTH: "چیزها", NORM: "چیزها"}, {ORTH: "ست", NORM: "ست"}], + "چیزهایش": [{ORTH: "چیزهای", NORM: "چیزهای"}, {ORTH: "ش", NORM: "ش"}], + "چیزیست": [{ORTH: "چیزی", NORM: "چیزی"}, {ORTH: "ست", NORM: "ست"}], + "چیست": [{ORTH: "چی", NORM: "چی"}, {ORTH: "ست", NORM: "ست"}], + "کارش": [{ORTH: "کار", NORM: "کار"}, {ORTH: "ش", NORM: "ش"}], + "کارشان": [{ORTH: "کار", NORM: "کار"}, {ORTH: "شان", NORM: "شان"}], + "کارم": [{ORTH: "کار", NORM: "کار"}, {ORTH: "م", NORM: "م"}], + "کارند": [{ORTH: "کار", NORM: "کار"}, {ORTH: "ند", NORM: "ند"}], + "کارهایم": [{ORTH: "کارها", NORM: "کارها"}, {ORTH: "یم", NORM: "یم"}], + "کافیست": [{ORTH: "کافی", NORM: "کافی"}, {ORTH: "ست", NORM: "ست"}], + "کتابخانه‌اش": [{ORTH: "کتابخانه‌", NORM: "کتابخانه‌"}, {ORTH: "اش", NORM: "اش"}], + "کتابش": [{ORTH: "کتاب", NORM: "کتاب"}, {ORTH: "ش", NORM: "ش"}], + "کتابهاشان": [{ORTH: "کتابها", NORM: "کتابها"}, {ORTH: "شان", NORM: "شان"}], + "کجاست": [{ORTH: "کجا", NORM: "کجا"}, {ORTH: "ست", NORM: "ست"}], + "کدورتهایشان": [{ORTH: "کدورتهای", NORM: "کدورتهای"}, {ORTH: "شان", NORM: "شان"}], + "کردنش": [{ORTH: "کردن", NORM: "کردن"}, {ORTH: "ش", NORM: "ش"}], + "کرم‌خورده‌اش": [ + {ORTH: "کرم‌خورده‌", NORM: "کرم‌خورده‌"}, + {ORTH: "اش", NORM: "اش"}, + ], + "کشش": [{ORTH: "کش", NORM: "کش"}, {ORTH: "ش", NORM: "ش"}], + "کشورش": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "ش", NORM: "ش"}], + "کشورشان": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "شان", NORM: "شان"}], + "کشورمان": [{ORTH: "کشور", NORM: "کشور"}, {ORTH: "مان", NORM: "مان"}], + "کشورهاست": [{ORTH: "کشورها", NORM: "کشورها"}, {ORTH: "ست", NORM: "ست"}], + "کلیشه‌هاست": [{ORTH: "کلیشه‌ها", NORM: "کلیشه‌ها"}, {ORTH: "ست", NORM: "ست"}], + "کمبودهاست": [{ORTH: "کمبودها", NORM: "کمبودها"}, {ORTH: "ست", NORM: "ست"}], + "کمتره": [{ORTH: "کمتر", NORM: "کمتر"}, {ORTH: "ه", NORM: "ه"}], + "کمکم": [{ORTH: "کمک", NORM: "کمک"}, {ORTH: "م", NORM: "م"}], + "کنارش": [{ORTH: "کنار", NORM: "کنار"}, {ORTH: "ش", NORM: "ش"}], + "کودکانشان": [{ORTH: "کودکان", NORM: "کودکان"}, {ORTH: "شان", NORM: "شان"}], + "کوچکش": [{ORTH: "کوچک", NORM: "کوچک"}, {ORTH: "ش", NORM: "ش"}], + "کیست": [{ORTH: "کی", NORM: "کی"}, {ORTH: "ست", NORM: "ست"}], + "کیفش": [{ORTH: "کیف", NORM: "کیف"}, {ORTH: "ش", NORM: "ش"}], + "گذشته‌اند": [{ORTH: "گذشته‌", NORM: "گذشته‌"}, {ORTH: "اند", NORM: "اند"}], + "گرانقدرش": [{ORTH: "گرانقدر", NORM: "گرانقدر"}, {ORTH: "ش", NORM: "ش"}], + "گرانقدرشان": [{ORTH: "گرانقدر", NORM: "گرانقدر"}, {ORTH: "شان", NORM: "شان"}], + "گردنتان": [{ORTH: "گردن", NORM: "گردن"}, {ORTH: "تان", NORM: "تان"}], + "گردنش": [{ORTH: "گردن", NORM: "گردن"}, {ORTH: "ش", NORM: "ش"}], + "گرفتارند": [{ORTH: "گرفتار", NORM: "گرفتار"}, {ORTH: "ند", NORM: "ند"}], + "گرفتنت": [{ORTH: "گرفتن", NORM: "گرفتن"}, {ORTH: "ت", NORM: "ت"}], + "گروهند": [{ORTH: "گروه", NORM: "گروه"}, {ORTH: "ند", NORM: "ند"}], + "گروگانهایش": [{ORTH: "گروگانهای", NORM: "گروگانهای"}, {ORTH: "ش", NORM: "ش"}], + "گریمش": [{ORTH: "گریم", NORM: "گریم"}, {ORTH: "ش", NORM: "ش"}], + "گفتارمان": [{ORTH: "گفتار", NORM: "گفتار"}, {ORTH: "مان", NORM: "مان"}], + "گلهایش": [{ORTH: "گلهای", NORM: "گلهای"}, {ORTH: "ش", NORM: "ش"}], + "گلویش": [{ORTH: "گلوی", NORM: "گلوی"}, {ORTH: "ش", NORM: "ش"}], + "گناهت": [{ORTH: "گناه", NORM: "گناه"}, {ORTH: "ت", NORM: "ت"}], + "گوشش": [{ORTH: "گوش", NORM: "گوش"}, {ORTH: "ش", NORM: "ش"}], + "گوشم": [{ORTH: "گوش", NORM: "گوش"}, {ORTH: "م", NORM: "م"}], + "گولش": [{ORTH: "گول", NORM: "گول"}, {ORTH: "ش", NORM: "ش"}], + "یادتان": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "تان", NORM: "تان"}], + "یادم": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "م", NORM: "م"}], + "یادمان": [{ORTH: "یاد", NORM: "یاد"}, {ORTH: "مان", NORM: "مان"}], + "یارانش": [{ORTH: "یاران", NORM: "یاران"}, {ORTH: "ش", NORM: "ش"}], } - -_exc.update( - { - "آبرویت": [ - {ORTH: "آبروی", LEMMA: "آبروی", NORM: "آبروی", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "آب‌نباتش": [ - {ORTH: "آب‌نبات", LEMMA: "آب‌نبات", NORM: "آب‌نبات", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "آثارش": [ - {ORTH: "آثار", LEMMA: "آثار", NORM: "آثار", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "آخرش": [ - {ORTH: "آخر", LEMMA: "آخر", NORM: "آخر", TAG: "ADV"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "آدمهاست": [ - {ORTH: "آدمها", LEMMA: "آدمها", NORM: "آدمها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "آرزومندیم": [ - {ORTH: "آرزومند", LEMMA: "آرزومند", NORM: "آرزومند", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "آزادند": [ - {ORTH: "آزاد", LEMMA: "آزاد", NORM: "آزاد", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "آسیب‌پذیرند": [ - {ORTH: "آسیب‌پذیر", LEMMA: "آسیب‌پذیر", NORM: "آسیب‌پذیر", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "آفریده‌اند": [ - {ORTH: "آفریده‌", LEMMA: "آفریده‌", NORM: "آفریده‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "آمدنش": [ - {ORTH: "آمدن", LEMMA: "آمدن", NORM: "آمدن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "آمریکاست": [ - {ORTH: "آمریکا", LEMMA: "آمریکا", NORM: "آمریکا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "آنجاست": [ - {ORTH: "آنجا", LEMMA: "آنجا", NORM: "آنجا", TAG: "ADV"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "آنست": [ - {ORTH: "آن", LEMMA: "آن", NORM: "آن", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "آنند": [ - {ORTH: "آن", LEMMA: "آن", NORM: "آن", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "آن‌هاست": [ - {ORTH: "آن‌ها", LEMMA: "آن‌ها", NORM: "آن‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "آپاداناست": [ - {ORTH: "آپادانا", LEMMA: "آپادانا", NORM: "آپادانا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "اجتماعی‌مان": [ - {ORTH: "اجتماعی‌", LEMMA: "اجتماعی‌", NORM: "اجتماعی‌", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "اجدادت": [ - {ORTH: "اجداد", LEMMA: "اجداد", NORM: "اجداد", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "اجدادش": [ - {ORTH: "اجداد", LEMMA: "اجداد", NORM: "اجداد", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اجدادی‌شان": [ - {ORTH: "اجدادی‌", LEMMA: "اجدادی‌", NORM: "اجدادی‌", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "اجراست": [ - {ORTH: "اجرا", LEMMA: "اجرا", NORM: "اجرا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "اختیارش": [ - {ORTH: "اختیار", LEMMA: "اختیار", NORM: "اختیار", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اخلاقشان": [ - {ORTH: "اخلاق", LEMMA: "اخلاق", NORM: "اخلاق", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "ادعایمان": [ - {ORTH: "ادعای", LEMMA: "ادعای", NORM: "ادعای", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "اذیتش": [ - {ORTH: "اذیت", LEMMA: "اذیت", NORM: "اذیت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اراده‌اش": [ - {ORTH: "اراده‌", LEMMA: "اراده‌", NORM: "اراده‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ارتباطش": [ - {ORTH: "ارتباط", LEMMA: "ارتباط", NORM: "ارتباط", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ارتباطمان": [ - {ORTH: "ارتباط", LEMMA: "ارتباط", NORM: "ارتباط", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "ارزشهاست": [ - {ORTH: "ارزشها", LEMMA: "ارزشها", NORM: "ارزشها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "ارزی‌اش": [ - {ORTH: "ارزی‌", LEMMA: "ارزی‌", NORM: "ارزی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "اره‌اش": [ - {ORTH: "اره‌", LEMMA: "اره‌", NORM: "اره‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ازش": [ - {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ازین": [ - {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"}, - {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"}, - ], - "ازین‌هاست": [ - {ORTH: "از", LEMMA: "از", NORM: "از", TAG: "ADP"}, - {ORTH: "ین‌ها", LEMMA: "ین‌ها", NORM: "ین‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "استخوانند": [ - {ORTH: "استخوان", LEMMA: "استخوان", NORM: "استخوان", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "اسلامند": [ - {ORTH: "اسلام", LEMMA: "اسلام", NORM: "اسلام", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "اسلامی‌اند": [ - {ORTH: "اسلامی‌", LEMMA: "اسلامی‌", NORM: "اسلامی‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "اسلحه‌هایشان": [ - {ORTH: "اسلحه‌های", LEMMA: "اسلحه‌های", NORM: "اسلحه‌های", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "اسمت": [ - {ORTH: "اسم", LEMMA: "اسم", NORM: "اسم", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "اسمش": [ - {ORTH: "اسم", LEMMA: "اسم", NORM: "اسم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اشتباهند": [ - {ORTH: "اشتباه", LEMMA: "اشتباه", NORM: "اشتباه", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "اصلش": [ - {ORTH: "اصل", LEMMA: "اصل", NORM: "اصل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اطاقش": [ - {ORTH: "اطاق", LEMMA: "اطاق", NORM: "اطاق", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اعتقادند": [ - {ORTH: "اعتقاد", LEMMA: "اعتقاد", NORM: "اعتقاد", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "اعلایش": [ - {ORTH: "اعلای", LEMMA: "اعلای", NORM: "اعلای", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "افتراست": [ - {ORTH: "افترا", LEMMA: "افترا", NORM: "افترا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "افطارت": [ - {ORTH: "افطار", LEMMA: "افطار", NORM: "افطار", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "اقوامش": [ - {ORTH: "اقوام", LEMMA: "اقوام", NORM: "اقوام", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "امروزیش": [ - {ORTH: "امروزی", LEMMA: "امروزی", NORM: "امروزی", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اموالش": [ - {ORTH: "اموال", LEMMA: "اموال", NORM: "اموال", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "امیدوارند": [ - {ORTH: "امیدوار", LEMMA: "امیدوار", NORM: "امیدوار", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "امیدواریم": [ - {ORTH: "امیدوار", LEMMA: "امیدوار", NORM: "امیدوار", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "انتخابهایم": [ - {ORTH: "انتخابها", LEMMA: "انتخابها", NORM: "انتخابها", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "انتظارم": [ - {ORTH: "انتظار", LEMMA: "انتظار", NORM: "انتظار", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "انجمنم": [ - {ORTH: "انجمن", LEMMA: "انجمن", NORM: "انجمن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "اندرش": [ - {ORTH: "اندر", LEMMA: "اندر", NORM: "اندر", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "انشایش": [ - {ORTH: "انشای", LEMMA: "انشای", NORM: "انشای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "انگشتشان": [ - {ORTH: "انگشت", LEMMA: "انگشت", NORM: "انگشت", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "انگشتهایش": [ - {ORTH: "انگشتهای", LEMMA: "انگشتهای", NORM: "انگشتهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اهمیتشان": [ - {ORTH: "اهمیت", LEMMA: "اهمیت", NORM: "اهمیت", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "اهمیتند": [ - {ORTH: "اهمیت", LEMMA: "اهمیت", NORM: "اهمیت", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "اوایلش": [ - {ORTH: "اوایل", LEMMA: "اوایل", NORM: "اوایل", TAG: "ADV"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اوست": [ - {ORTH: "او", LEMMA: "او", NORM: "او", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "اولش": [ - {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "ADV"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "اولشان": [ - {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "اولم": [ - {ORTH: "اول", LEMMA: "اول", NORM: "اول", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "اکثرشان": [ - {ORTH: "اکثر", LEMMA: "اکثر", NORM: "اکثر", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "ایتالیاست": [ - {ORTH: "ایتالیا", LEMMA: "ایتالیا", NORM: "ایتالیا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "ایرانی‌اش": [ - {ORTH: "ایرانی‌", LEMMA: "ایرانی‌", NORM: "ایرانی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "اینجاست": [ - {ORTH: "اینجا", LEMMA: "اینجا", NORM: "اینجا", TAG: "ADV"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "این‌هاست": [ - {ORTH: "این‌ها", LEMMA: "این‌ها", NORM: "این‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "بابات": [ - {ORTH: "بابا", LEMMA: "بابا", NORM: "بابا", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "بارش": [ - {ORTH: "بار", LEMMA: "بار", NORM: "بار", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بازیگرانش": [ - {ORTH: "بازیگران", LEMMA: "بازیگران", NORM: "بازیگران", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بازیگرمان": [ - {ORTH: "بازیگر", LEMMA: "بازیگر", NORM: "بازیگر", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "بازیگرهایم": [ - {ORTH: "بازیگرها", LEMMA: "بازیگرها", NORM: "بازیگرها", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "بازی‌اش": [ - {ORTH: "بازی‌", LEMMA: "بازی‌", NORM: "بازی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "بالاست": [ - {ORTH: "بالا", LEMMA: "بالا", NORM: "بالا", TAG: "ADV"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "باورند": [ - {ORTH: "باور", LEMMA: "باور", NORM: "باور", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "بجاست": [ - {ORTH: "بجا", LEMMA: "بجا", NORM: "بجا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "بدان": [ - {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"}, - {ORTH: "دان", LEMMA: "دان", NORM: "دان", TAG: "NOUN"}, - ], - "بدش": [ - {ORTH: "بد", LEMMA: "بد", NORM: "بد", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بدشان": [ - {ORTH: "بد", LEMMA: "بد", NORM: "بد", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بدنم": [ - {ORTH: "بدن", LEMMA: "بدن", NORM: "بدن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "بدهی‌ات": [ - {ORTH: "بدهی‌", LEMMA: "بدهی‌", NORM: "بدهی‌", TAG: "NOUN"}, - {ORTH: "ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "بدین": [ - {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"}, - {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"}, - ], - "برابرش": [ - {ORTH: "برابر", LEMMA: "برابر", NORM: "برابر", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "برادرت": [ - {ORTH: "برادر", LEMMA: "برادر", NORM: "برادر", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "برادرش": [ - {ORTH: "برادر", LEMMA: "برادر", NORM: "برادر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "برایت": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "برایتان": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "برایش": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "برایشان": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "برایم": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "برایمان": [ - {ORTH: "برای", LEMMA: "برای", NORM: "برای", TAG: "ADP"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "برخوردارند": [ - {ORTH: "برخوردار", LEMMA: "برخوردار", NORM: "برخوردار", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "برنامه‌سازهاست": [ - { - ORTH: "برنامه‌سازها", - LEMMA: "برنامه‌سازها", - NORM: "برنامه‌سازها", - TAG: "NOUN", - }, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "برهمش": [ - {ORTH: "برهم", LEMMA: "برهم", NORM: "برهم", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "برهنه‌اش": [ - {ORTH: "برهنه‌", LEMMA: "برهنه‌", NORM: "برهنه‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "برگهایش": [ - {ORTH: "برگها", LEMMA: "برگها", NORM: "برگها", TAG: "NOUN"}, - {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"}, - ], - "برین": [ - {ORTH: "بر", LEMMA: "بر", NORM: "بر", TAG: "ADP"}, - {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"}, - ], - "بزرگش": [ - {ORTH: "بزرگ", LEMMA: "بزرگ", NORM: "بزرگ", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بزرگ‌تری": [ - {ORTH: "بزرگ‌تر", LEMMA: "بزرگ‌تر", NORM: "بزرگ‌تر", TAG: "ADJ"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "بساطش": [ - {ORTH: "بساط", LEMMA: "بساط", NORM: "بساط", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بعدش": [ - {ORTH: "بعد", LEMMA: "بعد", NORM: "بعد", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بعضیهایشان": [ - {ORTH: "بعضیهای", LEMMA: "بعضیهای", NORM: "بعضیهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بعضی‌شان": [ - {ORTH: "بعضی", LEMMA: "بعضی", NORM: "بعضی", TAG: "NOUN"}, - {ORTH: "‌شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بقیه‌اش": [ - {ORTH: "بقیه‌", LEMMA: "بقیه‌", NORM: "بقیه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "بلندش": [ - {ORTH: "بلند", LEMMA: "بلند", NORM: "بلند", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بناگوشش": [ - {ORTH: "بناگوش", LEMMA: "بناگوش", NORM: "بناگوش", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بنظرم": [ - {ORTH: "ب", LEMMA: "ب", NORM: "ب", TAG: "ADP"}, - {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "بهت": [ - {ORTH: "به", LEMMA: "به", NORM: "به", TAG: "ADP"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "بهترش": [ - {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بهترم": [ - {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "بهتری": [ - {ORTH: "بهتر", LEMMA: "بهتر", NORM: "بهتر", TAG: "ADJ"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "بهش": [ - {ORTH: "به", LEMMA: "به", NORM: "به", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "به‌شان": [ - {ORTH: "به‌", LEMMA: "به‌", NORM: "به‌", TAG: "ADP"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بودمش": [ - {ORTH: "بودم", LEMMA: "بودم", NORM: "بودم", TAG: "VERB"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بودنش": [ - {ORTH: "بودن", LEMMA: "بودن", NORM: "بودن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بودن‌شان": [ - {ORTH: "بودن‌", LEMMA: "بودن‌", NORM: "بودن‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بوستانش": [ - {ORTH: "بوستان", LEMMA: "بوستان", NORM: "بوستان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بویش": [ - {ORTH: "بو", LEMMA: "بو", NORM: "بو", TAG: "NOUN"}, - {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"}, - ], - "بچه‌اش": [ - {ORTH: "بچه‌", LEMMA: "بچه‌", NORM: "بچه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "بچه‌م": [ - {ORTH: "بچه‌", LEMMA: "بچه‌", NORM: "بچه‌", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "بچه‌هایش": [ - {ORTH: "بچه‌های", LEMMA: "بچه‌های", NORM: "بچه‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بیانیه‌شان": [ - {ORTH: "بیانیه‌", LEMMA: "بیانیه‌", NORM: "بیانیه‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "بیدارم": [ - {ORTH: "بیدار", LEMMA: "بیدار", NORM: "بیدار", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "بیناتری": [ - {ORTH: "بیناتر", LEMMA: "بیناتر", NORM: "بیناتر", TAG: "ADJ"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "بی‌اطلاعند": [ - {ORTH: "بی‌اطلاع", LEMMA: "بی‌اطلاع", NORM: "بی‌اطلاع", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "بی‌اطلاعید": [ - {ORTH: "بی‌اطلاع", LEMMA: "بی‌اطلاع", NORM: "بی‌اطلاع", TAG: "ADJ"}, - {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"}, - ], - "بی‌بهره‌اند": [ - {ORTH: "بی‌بهره‌", LEMMA: "بی‌بهره‌", NORM: "بی‌بهره‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "بی‌تفاوتند": [ - {ORTH: "بی‌تفاوت", LEMMA: "بی‌تفاوت", NORM: "بی‌تفاوت", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "بی‌حسابش": [ - {ORTH: "بی‌حساب", LEMMA: "بی‌حساب", NORM: "بی‌حساب", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "بی‌نیش": [ - {ORTH: "بی‌نی", LEMMA: "بی‌نی", NORM: "بی‌نی", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "تجربه‌هایم": [ - {ORTH: "تجربه‌ها", LEMMA: "تجربه‌ها", NORM: "تجربه‌ها", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "تحریم‌هاست": [ - {ORTH: "تحریم‌ها", LEMMA: "تحریم‌ها", NORM: "تحریم‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "تحولند": [ - {ORTH: "تحول", LEMMA: "تحول", NORM: "تحول", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "تخیلی‌اش": [ - {ORTH: "تخیلی‌", LEMMA: "تخیلی‌", NORM: "تخیلی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ترا": [ - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - {ORTH: "را", LEMMA: "را", NORM: "را", TAG: "PART"}, - ], - "ترسشان": [ - {ORTH: "ترس", LEMMA: "ترس", NORM: "ترس", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "ترکش": [ - {ORTH: "ترک", LEMMA: "ترک", NORM: "ترک", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "تشنه‌ت": [ - {ORTH: "تشنه‌", LEMMA: "تشنه‌", NORM: "تشنه‌", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "تشکیلاتی‌اش": [ - {ORTH: "تشکیلاتی‌", LEMMA: "تشکیلاتی‌", NORM: "تشکیلاتی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "تعلقش": [ - {ORTH: "تعلق", LEMMA: "تعلق", NORM: "تعلق", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "تلاششان": [ - {ORTH: "تلاش", LEMMA: "تلاش", NORM: "تلاش", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "تلاشمان": [ - {ORTH: "تلاش", LEMMA: "تلاش", NORM: "تلاش", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "تماشاگرش": [ - {ORTH: "تماشاگر", LEMMA: "تماشاگر", NORM: "تماشاگر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "تمامشان": [ - {ORTH: "تمام", LEMMA: "تمام", NORM: "تمام", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "تنش": [ - {ORTH: "تن", LEMMA: "تن", NORM: "تن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "تنمان": [ - {ORTH: "تن", LEMMA: "تن", NORM: "تن", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "تنهایی‌اش": [ - {ORTH: "تنهایی‌", LEMMA: "تنهایی‌", NORM: "تنهایی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "توانایی‌اش": [ - {ORTH: "توانایی‌", LEMMA: "توانایی‌", NORM: "توانایی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "توجهش": [ - {ORTH: "توجه", LEMMA: "توجه", NORM: "توجه", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "توست": [ - {ORTH: "تو", LEMMA: "تو", NORM: "تو", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "توصیه‌اش": [ - {ORTH: "توصیه‌", LEMMA: "توصیه‌", NORM: "توصیه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "تیغه‌اش": [ - {ORTH: "تیغه‌", LEMMA: "تیغه‌", NORM: "تیغه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "جاست": [ - {ORTH: "جا", LEMMA: "جا", NORM: "جا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "جامعه‌اند": [ - {ORTH: "جامعه‌", LEMMA: "جامعه‌", NORM: "جامعه‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "جانم": [ - {ORTH: "جان", LEMMA: "جان", NORM: "جان", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "جایش": [ - {ORTH: "جای", LEMMA: "جای", NORM: "جای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جایشان": [ - {ORTH: "جای", LEMMA: "جای", NORM: "جای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "جدیدش": [ - {ORTH: "جدید", LEMMA: "جدید", NORM: "جدید", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جرمزاست": [ - {ORTH: "جرمزا", LEMMA: "جرمزا", NORM: "جرمزا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "جلوست": [ - {ORTH: "جلو", LEMMA: "جلو", NORM: "جلو", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "جلویش": [ - {ORTH: "جلوی", LEMMA: "جلوی", NORM: "جلوی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جمهوریست": [ - {ORTH: "جمهوری", LEMMA: "جمهوری", NORM: "جمهوری", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "جنسش": [ - {ORTH: "جنس", LEMMA: "جنس", NORM: "جنس", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جنس‌اند": [ - {ORTH: "جنس‌", LEMMA: "جنس‌", NORM: "جنس‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "جوانانش": [ - {ORTH: "جوانان", LEMMA: "جوانان", NORM: "جوانان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جویش": [ - {ORTH: "جوی", LEMMA: "جوی", NORM: "جوی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "جگرش": [ - {ORTH: "جگر", LEMMA: "جگر", NORM: "جگر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "حاضرم": [ - {ORTH: "حاضر", LEMMA: "حاضر", NORM: "حاضر", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "حالتهایشان": [ - {ORTH: "حالتهای", LEMMA: "حالتهای", NORM: "حالتهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حالیست": [ - {ORTH: "حالی", LEMMA: "حالی", NORM: "حالی", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "حالی‌مان": [ - {ORTH: "حالی‌", LEMMA: "حالی‌", NORM: "حالی‌", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "حاکیست": [ - {ORTH: "حاکی", LEMMA: "حاکی", NORM: "حاکی", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "حرامزادگی‌اش": [ - {ORTH: "حرامزادگی‌", LEMMA: "حرامزادگی‌", NORM: "حرامزادگی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "حرفتان": [ - {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "حرفش": [ - {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "حرفشان": [ - {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حرفم": [ - {ORTH: "حرف", LEMMA: "حرف", NORM: "حرف", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "حرف‌های‌شان": [ - {ORTH: "حرف‌های‌", LEMMA: "حرف‌های‌", NORM: "حرف‌های‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حرکتمان": [ - {ORTH: "حرکت", LEMMA: "حرکت", NORM: "حرکت", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "حریفانشان": [ - {ORTH: "حریفان", LEMMA: "حریفان", NORM: "حریفان", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حضورشان": [ - {ORTH: "حضور", LEMMA: "حضور", NORM: "حضور", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حمایتش": [ - {ORTH: "حمایت", LEMMA: "حمایت", NORM: "حمایت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "حواسش": [ - {ORTH: "حواس", LEMMA: "حواس", NORM: "حواس", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "حواسشان": [ - {ORTH: "حواس", LEMMA: "حواس", NORM: "حواس", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حوصله‌مان": [ - {ORTH: "حوصله‌", LEMMA: "حوصله‌", NORM: "حوصله‌", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "حکومتش": [ - {ORTH: "حکومت", LEMMA: "حکومت", NORM: "حکومت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "حکومتشان": [ - {ORTH: "حکومت", LEMMA: "حکومت", NORM: "حکومت", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "حیفم": [ - {ORTH: "حیف", LEMMA: "حیف", NORM: "حیف", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "خاندانش": [ - {ORTH: "خاندان", LEMMA: "خاندان", NORM: "خاندان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خانه‌اش": [ - {ORTH: "خانه‌", LEMMA: "خانه‌", NORM: "خانه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "خانه‌شان": [ - {ORTH: "خانه‌", LEMMA: "خانه‌", NORM: "خانه‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خانه‌مان": [ - {ORTH: "خانه‌", LEMMA: "خانه‌", NORM: "خانه‌", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "خانه‌هایشان": [ - {ORTH: "خانه‌های", LEMMA: "خانه‌های", NORM: "خانه‌های", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خانواده‌ات": [ - {ORTH: "خانواده", LEMMA: "خانواده", NORM: "خانواده", TAG: "NOUN"}, - {ORTH: "‌ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "خانواده‌اش": [ - {ORTH: "خانواده‌", LEMMA: "خانواده‌", NORM: "خانواده‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "خانواده‌ام": [ - {ORTH: "خانواده‌", LEMMA: "خانواده‌", NORM: "خانواده‌", TAG: "NOUN"}, - {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"}, - ], - "خانواده‌شان": [ - {ORTH: "خانواده‌", LEMMA: "خانواده‌", NORM: "خانواده‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خداست": [ - {ORTH: "خدا", LEMMA: "خدا", NORM: "خدا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "خدایش": [ - {ORTH: "خدا", LEMMA: "خدا", NORM: "خدا", TAG: "NOUN"}, - {ORTH: "یش", LEMMA: "یش", NORM: "یش", TAG: "NOUN"}, - ], - "خدایشان": [ - {ORTH: "خدای", LEMMA: "خدای", NORM: "خدای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خردسالش": [ - {ORTH: "خردسال", LEMMA: "خردسال", NORM: "خردسال", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خروپفشان": [ - {ORTH: "خروپف", LEMMA: "خروپف", NORM: "خروپف", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خسته‌ای": [ - {ORTH: "خسته‌", LEMMA: "خسته‌", NORM: "خسته‌", TAG: "ADJ"}, - {ORTH: "ای", LEMMA: "ای", NORM: "ای", TAG: "VERB"}, - ], - "خطت": [ - {ORTH: "خط", LEMMA: "خط", NORM: "خط", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "خوابمان": [ - {ORTH: "خواب", LEMMA: "خواب", NORM: "خواب", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "خواندنش": [ - {ORTH: "خواندن", LEMMA: "خواندن", NORM: "خواندن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خواهرش": [ - {ORTH: "خواهر", LEMMA: "خواهر", NORM: "خواهر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خوبش": [ - {ORTH: "خوب", LEMMA: "خوب", NORM: "خوب", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خودت": [ - {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "خودتان": [ - {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "خودش": [ - {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خودشان": [ - {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خودمان": [ - {ORTH: "خود", LEMMA: "خود", NORM: "خود", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "خوردمان": [ - {ORTH: "خورد", LEMMA: "خورد", NORM: "خورد", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "خوردنشان": [ - {ORTH: "خوردن", LEMMA: "خوردن", NORM: "خوردن", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خوشش": [ - {ORTH: "خوش", LEMMA: "خوش", NORM: "خوش", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خوشوقتم": [ - {ORTH: "خوشوقت", LEMMA: "خوشوقت", NORM: "خوشوقت", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "خونشان": [ - {ORTH: "خون", LEMMA: "خون", NORM: "خون", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "خویش": [ - {ORTH: "خوی", LEMMA: "خوی", NORM: "خوی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خویشتنم": [ - {ORTH: "خویشتن", LEMMA: "خویشتن", NORM: "خویشتن", TAG: "VERB"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "خیالش": [ - {ORTH: "خیال", LEMMA: "خیال", NORM: "خیال", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "خیسش": [ - {ORTH: "خیس", LEMMA: "خیس", NORM: "خیس", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "داراست": [ - {ORTH: "دارا", LEMMA: "دارا", NORM: "دارا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "داستانهایش": [ - {ORTH: "داستانهای", LEMMA: "داستانهای", NORM: "داستانهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دخترمان": [ - {ORTH: "دختر", LEMMA: "دختر", NORM: "دختر", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "دخیلند": [ - {ORTH: "دخیل", LEMMA: "دخیل", NORM: "دخیل", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "درباره‌ات": [ - {ORTH: "درباره", LEMMA: "درباره", NORM: "درباره", TAG: "ADP"}, - {ORTH: "‌ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "درباره‌اش": [ - {ORTH: "درباره‌", LEMMA: "درباره‌", NORM: "درباره‌", TAG: "ADP"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "دردش": [ - {ORTH: "درد", LEMMA: "درد", NORM: "درد", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دردشان": [ - {ORTH: "درد", LEMMA: "درد", NORM: "درد", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "درسته": [ - {ORTH: "درست", LEMMA: "درست", NORM: "درست", TAG: "ADJ"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "درش": [ - {ORTH: "در", LEMMA: "در", NORM: "در", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "درون‌شان": [ - {ORTH: "درون‌", LEMMA: "درون‌", NORM: "درون‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "درین": [ - {ORTH: "در", LEMMA: "در", NORM: "در", TAG: "ADP"}, - {ORTH: "ین", LEMMA: "ین", NORM: "ین", TAG: "NOUN"}, - ], - "دریچه‌هایش": [ - {ORTH: "دریچه‌های", LEMMA: "دریچه‌های", NORM: "دریچه‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دزدانش": [ - {ORTH: "دزدان", LEMMA: "دزدان", NORM: "دزدان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دستت": [ - {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دستش": [ - {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دستمان": [ - {ORTH: "دست", LEMMA: "دست", NORM: "دست", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "دستهایشان": [ - {ORTH: "دستهای", LEMMA: "دستهای", NORM: "دستهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دست‌یافتنی‌ست": [ - { - ORTH: "دست‌یافتنی‌", - LEMMA: "دست‌یافتنی‌", - NORM: "دست‌یافتنی‌", - TAG: "ADJ", - }, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "دشمنند": [ - {ORTH: "دشمن", LEMMA: "دشمن", NORM: "دشمن", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "دشمنیشان": [ - {ORTH: "دشمنی", LEMMA: "دشمنی", NORM: "دشمنی", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دشمنیم": [ - {ORTH: "دشمن", LEMMA: "دشمن", NORM: "دشمن", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "دفترش": [ - {ORTH: "دفتر", LEMMA: "دفتر", NORM: "دفتر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دفنشان": [ - {ORTH: "دفن", LEMMA: "دفن", NORM: "دفن", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دلت": [ - {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دلش": [ - {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دلشان": [ - {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دلم": [ - {ORTH: "دل", LEMMA: "دل", NORM: "دل", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "دلیلش": [ - {ORTH: "دلیل", LEMMA: "دلیل", NORM: "دلیل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دنبالش": [ - {ORTH: "دنبال", LEMMA: "دنبال", NORM: "دنبال", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دنباله‌اش": [ - {ORTH: "دنباله‌", LEMMA: "دنباله‌", NORM: "دنباله‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "دهاتی‌هایش": [ - {ORTH: "دهاتی‌های", LEMMA: "دهاتی‌های", NORM: "دهاتی‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دهانت": [ - {ORTH: "دهان", LEMMA: "دهان", NORM: "دهان", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دهنش": [ - {ORTH: "دهن", LEMMA: "دهن", NORM: "دهن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دورش": [ - {ORTH: "دور", LEMMA: "دور", NORM: "دور", TAG: "ADV"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دوروبریهاشان": [ - {ORTH: "دوروبریها", LEMMA: "دوروبریها", NORM: "دوروبریها", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دوستانش": [ - {ORTH: "دوستان", LEMMA: "دوستان", NORM: "دوستان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دوستانشان": [ - {ORTH: "دوستان", LEMMA: "دوستان", NORM: "دوستان", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دوستت": [ - {ORTH: "دوست", LEMMA: "دوست", NORM: "دوست", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دوستش": [ - {ORTH: "دوست", LEMMA: "دوست", NORM: "دوست", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دومش": [ - {ORTH: "دوم", LEMMA: "دوم", NORM: "دوم", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دویدنش": [ - {ORTH: "دویدن", LEMMA: "دویدن", NORM: "دویدن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دکورهایمان": [ - {ORTH: "دکورهای", LEMMA: "دکورهای", NORM: "دکورهای", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "دیدگاهش": [ - {ORTH: "دیدگاه", LEMMA: "دیدگاه", NORM: "دیدگاه", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دیرت": [ - {ORTH: "دیر", LEMMA: "دیر", NORM: "دیر", TAG: "ADV"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دیرم": [ - {ORTH: "دیر", LEMMA: "دیر", NORM: "دیر", TAG: "ADV"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "دینت": [ - {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "دینش": [ - {ORTH: "دین", LEMMA: "دین", NORM: "دین", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دین‌شان": [ - {ORTH: "دین‌", LEMMA: "دین‌", NORM: "دین‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "دیواره‌هایش": [ - {ORTH: "دیواره‌های", LEMMA: "دیواره‌های", NORM: "دیواره‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "دیوانه‌ای": [ - {ORTH: "دیوانه‌", LEMMA: "دیوانه‌", NORM: "دیوانه‌", TAG: "ADJ"}, - {ORTH: "ای", LEMMA: "ای", NORM: "ای", TAG: "VERB"}, - ], - "دیوی": [ - {ORTH: "دیو", LEMMA: "دیو", NORM: "دیو", TAG: "NOUN"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "دیگرم": [ - {ORTH: "دیگر", LEMMA: "دیگر", NORM: "دیگر", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "دیگرمان": [ - {ORTH: "دیگر", LEMMA: "دیگر", NORM: "دیگر", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "ذهنش": [ - {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ذهنشان": [ - {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "ذهنم": [ - {ORTH: "ذهن", LEMMA: "ذهن", NORM: "ذهن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "رئوسش": [ - {ORTH: "رئوس", LEMMA: "رئوس", NORM: "رئوس", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "راهشان": [ - {ORTH: "راه", LEMMA: "راه", NORM: "راه", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "راهگشاست": [ - {ORTH: "راهگشا", LEMMA: "راهگشا", NORM: "راهگشا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "رایانه‌هایشان": [ - {ORTH: "رایانه‌های", LEMMA: "رایانه‌های", NORM: "رایانه‌های", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "رعایتشان": [ - {ORTH: "رعایت", LEMMA: "رعایت", NORM: "رعایت", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "رفتارش": [ - {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "رفتارشان": [ - {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "رفتارمان": [ - {ORTH: "رفتار", LEMMA: "رفتار", NORM: "رفتار", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "رفتارهاست": [ - {ORTH: "رفتارها", LEMMA: "رفتارها", NORM: "رفتارها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "رفتارهایشان": [ - {ORTH: "رفتارهای", LEMMA: "رفتارهای", NORM: "رفتارهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "رفقایم": [ - {ORTH: "رفقا", LEMMA: "رفقا", NORM: "رفقا", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "رقیق‌ترش": [ - {ORTH: "رقیق‌تر", LEMMA: "رقیق‌تر", NORM: "رقیق‌تر", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "رنجند": [ - {ORTH: "رنج", LEMMA: "رنج", NORM: "رنج", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "رهگشاست": [ - {ORTH: "رهگشا", LEMMA: "رهگشا", NORM: "رهگشا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "رواست": [ - {ORTH: "روا", LEMMA: "روا", NORM: "روا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "روبروست": [ - {ORTH: "روبرو", LEMMA: "روبرو", NORM: "روبرو", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "روحی‌اش": [ - {ORTH: "روحی‌", LEMMA: "روحی‌", NORM: "روحی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "روزنامه‌اش": [ - {ORTH: "روزنامه‌", LEMMA: "روزنامه‌", NORM: "روزنامه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "روزه‌ست": [ - {ORTH: "روزه‌", LEMMA: "روزه‌", NORM: "روزه‌", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "روسری‌اش": [ - {ORTH: "روسری‌", LEMMA: "روسری‌", NORM: "روسری‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "روشتان": [ - {ORTH: "روش", LEMMA: "روش", NORM: "روش", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "رویش": [ - {ORTH: "روی", LEMMA: "روی", NORM: "روی", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زبانش": [ - {ORTH: "زبان", LEMMA: "زبان", NORM: "زبان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زحماتشان": [ - {ORTH: "زحمات", LEMMA: "زحمات", NORM: "زحمات", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "زدنهایشان": [ - {ORTH: "زدنهای", LEMMA: "زدنهای", NORM: "زدنهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "زرنگشان": [ - {ORTH: "زرنگ", LEMMA: "زرنگ", NORM: "زرنگ", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "زشتش": [ - {ORTH: "زشت", LEMMA: "زشت", NORM: "زشت", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زشتکارانند": [ - {ORTH: "زشتکاران", LEMMA: "زشتکاران", NORM: "زشتکاران", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "زلفش": [ - {ORTH: "زلف", LEMMA: "زلف", NORM: "زلف", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زمن": [ - {ORTH: "ز", LEMMA: "ز", NORM: "ز", TAG: "ADP"}, - {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"}, - ], - "زنبوری‌اش": [ - {ORTH: "زنبوری‌", LEMMA: "زنبوری‌", NORM: "زنبوری‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "زندانم": [ - {ORTH: "زندان", LEMMA: "زندان", NORM: "زندان", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "زنده‌ام": [ - {ORTH: "زنده‌", LEMMA: "زنده‌", NORM: "زنده‌", TAG: "ADJ"}, - {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "VERB"}, - ], - "زندگانی‌اش": [ - {ORTH: "زندگانی‌", LEMMA: "زندگانی‌", NORM: "زندگانی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "زندگی‌اش": [ - {ORTH: "زندگی‌", LEMMA: "زندگی‌", NORM: "زندگی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "زندگی‌ام": [ - {ORTH: "زندگی‌", LEMMA: "زندگی‌", NORM: "زندگی‌", TAG: "NOUN"}, - {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"}, - ], - "زندگی‌شان": [ - {ORTH: "زندگی‌", LEMMA: "زندگی‌", NORM: "زندگی‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "زنش": [ - {ORTH: "زن", LEMMA: "زن", NORM: "زن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زنند": [ - {ORTH: "زن", LEMMA: "زن", NORM: "زن", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "زو": [ - {ORTH: "ز", LEMMA: "ز", NORM: "ز", TAG: "ADP"}, - {ORTH: "و", LEMMA: "و", NORM: "و", TAG: "NOUN"}, - ], - "زیاده": [ - {ORTH: "زیاد", LEMMA: "زیاد", NORM: "زیاد", TAG: "ADJ"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "زیباست": [ - {ORTH: "زیبا", LEMMA: "زیبا", NORM: "زیبا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "زیبایش": [ - {ORTH: "زیبای", LEMMA: "زیبای", NORM: "زیبای", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "زیبایی": [ - {ORTH: "زیبای", LEMMA: "زیبای", NORM: "زیبای", TAG: "ADJ"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "زیربناست": [ - {ORTH: "زیربنا", LEMMA: "زیربنا", NORM: "زیربنا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "زیرک‌اند": [ - {ORTH: "زیرک‌", LEMMA: "زیرک‌", NORM: "زیرک‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "سؤالتان": [ - {ORTH: "سؤال", LEMMA: "سؤال", NORM: "سؤال", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "سؤالم": [ - {ORTH: "سؤال", LEMMA: "سؤال", NORM: "سؤال", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "سابقه‌اش": [ - {ORTH: "سابقه‌", LEMMA: "سابقه‌", NORM: "سابقه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ساختنم": [ - {ORTH: "ساختن", LEMMA: "ساختن", NORM: "ساختن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "ساده‌اش": [ - {ORTH: "ساده‌", LEMMA: "ساده‌", NORM: "ساده‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ساده‌اند": [ - {ORTH: "ساده‌", LEMMA: "ساده‌", NORM: "ساده‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "سازمانش": [ - {ORTH: "سازمان", LEMMA: "سازمان", NORM: "سازمان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ساعتم": [ - {ORTH: "ساعت", LEMMA: "ساعت", NORM: "ساعت", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "سالته": [ - {ORTH: "سال", LEMMA: "سال", NORM: "سال", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "سالش": [ - {ORTH: "سال", LEMMA: "سال", NORM: "سال", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سالهاست": [ - {ORTH: "سالها", LEMMA: "سالها", NORM: "سالها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "ساله‌اش": [ - {ORTH: "ساله‌", LEMMA: "ساله‌", NORM: "ساله‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ساکتند": [ - {ORTH: "ساکت", LEMMA: "ساکت", NORM: "ساکت", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "ساکنند": [ - {ORTH: "ساکن", LEMMA: "ساکن", NORM: "ساکن", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "سبزشان": [ - {ORTH: "سبز", LEMMA: "سبز", NORM: "سبز", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "سبیل‌مان": [ - {ORTH: "سبیل‌", LEMMA: "سبیل‌", NORM: "سبیل‌", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "ستم‌هایش": [ - {ORTH: "ستم‌های", LEMMA: "ستم‌های", NORM: "ستم‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سخنانش": [ - {ORTH: "سخنان", LEMMA: "سخنان", NORM: "سخنان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سخنانشان": [ - {ORTH: "سخنان", LEMMA: "سخنان", NORM: "سخنان", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "سخنتان": [ - {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "سخنش": [ - {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سخنم": [ - {ORTH: "سخن", LEMMA: "سخن", NORM: "سخن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "سردش": [ - {ORTH: "سرد", LEMMA: "سرد", NORM: "سرد", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سرزمینشان": [ - {ORTH: "سرزمین", LEMMA: "سرزمین", NORM: "سرزمین", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "سرش": [ - {ORTH: "سر", LEMMA: "سر", NORM: "سر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سرمایه‌دارهاست": [ - { - ORTH: "سرمایه‌دارها", - LEMMA: "سرمایه‌دارها", - NORM: "سرمایه‌دارها", - TAG: "NOUN", - }, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "سرنوشتش": [ - {ORTH: "سرنوشت", LEMMA: "سرنوشت", NORM: "سرنوشت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سرنوشتشان": [ - {ORTH: "سرنوشت", LEMMA: "سرنوشت", NORM: "سرنوشت", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "سروتهش": [ - {ORTH: "سروته", LEMMA: "سروته", NORM: "سروته", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سرچشمه‌اش": [ - {ORTH: "سرچشمه‌", LEMMA: "سرچشمه‌", NORM: "سرچشمه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "سقمش": [ - {ORTH: "سقم", LEMMA: "سقم", NORM: "سقم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سنش": [ - {ORTH: "سن", LEMMA: "سن", NORM: "سن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سپاهش": [ - {ORTH: "سپاه", LEMMA: "سپاه", NORM: "سپاه", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "سیاسیشان": [ - {ORTH: "سیاسی", LEMMA: "سیاسی", NORM: "سیاسی", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "سیاه‌چاله‌هاست": [ - { - ORTH: "سیاه‌چاله‌ها", - LEMMA: "سیاه‌چاله‌ها", - NORM: "سیاه‌چاله‌ها", - TAG: "NOUN", - }, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "شاخه‌هایشان": [ - {ORTH: "شاخه‌های", LEMMA: "شاخه‌های", NORM: "شاخه‌های", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "شالوده‌اش": [ - {ORTH: "شالوده‌", LEMMA: "شالوده‌", NORM: "شالوده‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "شانه‌هایش": [ - {ORTH: "شانه‌های", LEMMA: "شانه‌های", NORM: "شانه‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شاهدیم": [ - {ORTH: "شاهد", LEMMA: "شاهد", NORM: "شاهد", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "شاهکارهایش": [ - {ORTH: "شاهکارهای", LEMMA: "شاهکارهای", NORM: "شاهکارهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شخصیتش": [ - {ORTH: "شخصیت", LEMMA: "شخصیت", NORM: "شخصیت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شدنشان": [ - {ORTH: "شدن", LEMMA: "شدن", NORM: "شدن", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "شرکتیست": [ - {ORTH: "شرکتی", LEMMA: "شرکتی", NORM: "شرکتی", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "شعارهاشان": [ - {ORTH: "شعارها", LEMMA: "شعارها", NORM: "شعارها", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "شعورش": [ - {ORTH: "شعور", LEMMA: "شعور", NORM: "شعور", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شغلش": [ - {ORTH: "شغل", LEMMA: "شغل", NORM: "شغل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شماست": [ - {ORTH: "شما", LEMMA: "شما", NORM: "شما", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "شمشیرش": [ - {ORTH: "شمشیر", LEMMA: "شمشیر", NORM: "شمشیر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شنیدنش": [ - {ORTH: "شنیدن", LEMMA: "شنیدن", NORM: "شنیدن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شوراست": [ - {ORTH: "شورا", LEMMA: "شورا", NORM: "شورا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "شومت": [ - {ORTH: "شوم", LEMMA: "شوم", NORM: "شوم", TAG: "ADJ"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "شیرینترش": [ - {ORTH: "شیرینتر", LEMMA: "شیرینتر", NORM: "شیرینتر", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "شیطان‌اند": [ - {ORTH: "شیطان‌", LEMMA: "شیطان‌", NORM: "شیطان‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "شیوه‌هاست": [ - {ORTH: "شیوه‌ها", LEMMA: "شیوه‌ها", NORM: "شیوه‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "صاحبش": [ - {ORTH: "صاحب", LEMMA: "صاحب", NORM: "صاحب", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "صحنه‌اش": [ - {ORTH: "صحنه‌", LEMMA: "صحنه‌", NORM: "صحنه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "صدایش": [ - {ORTH: "صدای", LEMMA: "صدای", NORM: "صدای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "صددند": [ - {ORTH: "صدد", LEMMA: "صدد", NORM: "صدد", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "صندوق‌هاست": [ - {ORTH: "صندوق‌ها", LEMMA: "صندوق‌ها", NORM: "صندوق‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "صندوق‌هایش": [ - {ORTH: "صندوق‌های", LEMMA: "صندوق‌های", NORM: "صندوق‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "صورتش": [ - {ORTH: "صورت", LEMMA: "صورت", NORM: "صورت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ضروری‌اند": [ - {ORTH: "ضروری‌", LEMMA: "ضروری‌", NORM: "ضروری‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "ضمیرش": [ - {ORTH: "ضمیر", LEMMA: "ضمیر", NORM: "ضمیر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "طرفش": [ - {ORTH: "طرف", LEMMA: "طرف", NORM: "طرف", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "طلسمش": [ - {ORTH: "طلسم", LEMMA: "طلسم", NORM: "طلسم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "طوره": [ - {ORTH: "طور", LEMMA: "طور", NORM: "طور", TAG: "NOUN"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "عاشوراست": [ - {ORTH: "عاشورا", LEMMA: "عاشورا", NORM: "عاشورا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "عبارتند": [ - {ORTH: "عبارت", LEMMA: "عبارت", NORM: "عبارت", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "عزیزانتان": [ - {ORTH: "عزیزان", LEMMA: "عزیزان", NORM: "عزیزان", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "عزیزانش": [ - {ORTH: "عزیزان", LEMMA: "عزیزان", NORM: "عزیزان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عزیزش": [ - {ORTH: "عزیز", LEMMA: "عزیز", NORM: "عزیز", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عشرت‌طلبی‌اش": [ - {ORTH: "عشرت‌طلبی‌", LEMMA: "عشرت‌طلبی‌", NORM: "عشرت‌طلبی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "عقبیم": [ - {ORTH: "عقب", LEMMA: "عقب", NORM: "عقب", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "علاقه‌اش": [ - {ORTH: "علاقه‌", LEMMA: "علاقه‌", NORM: "علاقه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "علمیمان": [ - {ORTH: "علمی", LEMMA: "علمی", NORM: "علمی", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "عمرش": [ - {ORTH: "عمر", LEMMA: "عمر", NORM: "عمر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عمرشان": [ - {ORTH: "عمر", LEMMA: "عمر", NORM: "عمر", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "عملش": [ - {ORTH: "عمل", LEMMA: "عمل", NORM: "عمل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عملی‌اند": [ - {ORTH: "عملی‌", LEMMA: "عملی‌", NORM: "عملی‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "عمویت": [ - {ORTH: "عموی", LEMMA: "عموی", NORM: "عموی", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "عمویش": [ - {ORTH: "عموی", LEMMA: "عموی", NORM: "عموی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عمیقش": [ - {ORTH: "عمیق", LEMMA: "عمیق", NORM: "عمیق", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عواملش": [ - {ORTH: "عوامل", LEMMA: "عوامل", NORM: "عوامل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "عوضشان": [ - {ORTH: "عوض", LEMMA: "عوض", NORM: "عوض", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "غذایی‌شان": [ - {ORTH: "غذایی‌", LEMMA: "غذایی‌", NORM: "غذایی‌", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "غریبه‌اند": [ - {ORTH: "غریبه‌", LEMMA: "غریبه‌", NORM: "غریبه‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "غلامانش": [ - {ORTH: "غلامان", LEMMA: "غلامان", NORM: "غلامان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "غلطهاست": [ - {ORTH: "غلطها", LEMMA: "غلطها", NORM: "غلطها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "فراموشتان": [ - {ORTH: "فراموش", LEMMA: "فراموش", NORM: "فراموش", TAG: "ADJ"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "فردی‌اند": [ - {ORTH: "فردی‌", LEMMA: "فردی‌", NORM: "فردی‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "فرزندانش": [ - {ORTH: "فرزندان", LEMMA: "فرزندان", NORM: "فرزندان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "فرزندش": [ - {ORTH: "فرزند", LEMMA: "فرزند", NORM: "فرزند", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "فرم‌هایش": [ - {ORTH: "فرم‌های", LEMMA: "فرم‌های", NORM: "فرم‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "فرهنگی‌مان": [ - {ORTH: "فرهنگی‌", LEMMA: "فرهنگی‌", NORM: "فرهنگی‌", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "فریادشان": [ - {ORTH: "فریاد", LEMMA: "فریاد", NORM: "فریاد", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "فضایی‌شان": [ - {ORTH: "فضایی‌", LEMMA: "فضایی‌", NORM: "فضایی‌", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "فقیرشان": [ - {ORTH: "فقیر", LEMMA: "فقیر", NORM: "فقیر", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "فوری‌شان": [ - {ORTH: "فوری‌", LEMMA: "فوری‌", NORM: "فوری‌", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "قائلند": [ - {ORTH: "قائل", LEMMA: "قائل", NORM: "قائل", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "قائلیم": [ - {ORTH: "قائل", LEMMA: "قائل", NORM: "قائل", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "قادرند": [ - {ORTH: "قادر", LEMMA: "قادر", NORM: "قادر", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "قانونمندش": [ - {ORTH: "قانونمند", LEMMA: "قانونمند", NORM: "قانونمند", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "قبلند": [ - {ORTH: "قبل", LEMMA: "قبل", NORM: "قبل", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "قبلی‌اش": [ - {ORTH: "قبلی‌", LEMMA: "قبلی‌", NORM: "قبلی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "قبلی‌مان": [ - {ORTH: "قبلی‌", LEMMA: "قبلی‌", NORM: "قبلی‌", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "قدریست": [ - {ORTH: "قدری", LEMMA: "قدری", NORM: "قدری", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "قدمش": [ - {ORTH: "قدم", LEMMA: "قدم", NORM: "قدم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "قسمتش": [ - {ORTH: "قسمت", LEMMA: "قسمت", NORM: "قسمت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "قضایاست": [ - {ORTH: "قضایا", LEMMA: "قضایا", NORM: "قضایا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "قضیه‌شان": [ - {ORTH: "قضیه‌", LEMMA: "قضیه‌", NORM: "قضیه‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "قهرمانهایشان": [ - {ORTH: "قهرمانهای", LEMMA: "قهرمانهای", NORM: "قهرمانهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "قهرمانیش": [ - {ORTH: "قهرمانی", LEMMA: "قهرمانی", NORM: "قهرمانی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "قومت": [ - {ORTH: "قوم", LEMMA: "قوم", NORM: "قوم", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "لازمه‌اش": [ - {ORTH: "لازمه‌", LEMMA: "لازمه‌", NORM: "لازمه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "مأموریتش": [ - {ORTH: "مأموریت", LEMMA: "مأموریت", NORM: "مأموریت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مأموریتم": [ - {ORTH: "مأموریت", LEMMA: "مأموریت", NORM: "مأموریت", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "مأموریت‌اند": [ - {ORTH: "مأموریت‌", LEMMA: "مأموریت‌", NORM: "مأموریت‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "مادرانشان": [ - {ORTH: "مادران", LEMMA: "مادران", NORM: "مادران", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مادرت": [ - {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "مادرش": [ - {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مادرم": [ - {ORTH: "مادر", LEMMA: "مادر", NORM: "مادر", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "ماست": [ - {ORTH: "ما", LEMMA: "ما", NORM: "ما", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "مالی‌اش": [ - {ORTH: "مالی‌", LEMMA: "مالی‌", NORM: "مالی‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "ماهیتش": [ - {ORTH: "ماهیت", LEMMA: "ماهیت", NORM: "ماهیت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مایی": [ - {ORTH: "ما", LEMMA: "ما", NORM: "ما", TAG: "NOUN"}, - {ORTH: "یی", LEMMA: "یی", NORM: "یی", TAG: "VERB"}, - ], - "مجازاتش": [ - {ORTH: "مجازات", LEMMA: "مجازات", NORM: "مجازات", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مجبورند": [ - {ORTH: "مجبور", LEMMA: "مجبور", NORM: "مجبور", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "محتاجند": [ - {ORTH: "محتاج", LEMMA: "محتاج", NORM: "محتاج", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "محرمم": [ - {ORTH: "محرم", LEMMA: "محرم", NORM: "محرم", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "SCONJ"}, - ], - "محلش": [ - {ORTH: "محل", LEMMA: "محل", NORM: "محل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مخالفند": [ - {ORTH: "مخالف", LEMMA: "مخالف", NORM: "مخالف", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مخدرش": [ - {ORTH: "مخدر", LEMMA: "مخدر", NORM: "مخدر", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مدتهاست": [ - {ORTH: "مدتها", LEMMA: "مدتها", NORM: "مدتها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "مدرسه‌ات": [ - {ORTH: "مدرسه", LEMMA: "مدرسه", NORM: "مدرسه", TAG: "NOUN"}, - {ORTH: "‌ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "مدرکم": [ - {ORTH: "مدرک", LEMMA: "مدرک", NORM: "مدرک", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "مدیرانش": [ - {ORTH: "مدیران", LEMMA: "مدیران", NORM: "مدیران", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مدیونم": [ - {ORTH: "مدیون", LEMMA: "مدیون", NORM: "مدیون", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "مذهبی‌اند": [ - {ORTH: "مذهبی‌", LEMMA: "مذهبی‌", NORM: "مذهبی‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "مرا": [ - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - {ORTH: "را", LEMMA: "را", NORM: "را", TAG: "PART"}, - ], - "مرادت": [ - {ORTH: "مراد", LEMMA: "مراد", NORM: "مراد", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "مردمشان": [ - {ORTH: "مردم", LEMMA: "مردم", NORM: "مردم", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مردمند": [ - {ORTH: "مردم", LEMMA: "مردم", NORM: "مردم", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مردم‌اند": [ - {ORTH: "مردم‌", LEMMA: "مردم‌", NORM: "مردم‌", TAG: "NOUN"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "مرزشان": [ - {ORTH: "مرز", LEMMA: "مرز", NORM: "مرز", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مرزهاشان": [ - {ORTH: "مرزها", LEMMA: "مرزها", NORM: "مرزها", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مزدورش": [ - {ORTH: "مزدور", LEMMA: "مزدور", NORM: "مزدور", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مسئولیتش": [ - {ORTH: "مسئولیت", LEMMA: "مسئولیت", NORM: "مسئولیت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مسائلش": [ - {ORTH: "مسائل", LEMMA: "مسائل", NORM: "مسائل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مستحضرید": [ - {ORTH: "مستحضر", LEMMA: "مستحضر", NORM: "مستحضر", TAG: "ADJ"}, - {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"}, - ], - "مسلمانم": [ - {ORTH: "مسلمان", LEMMA: "مسلمان", NORM: "مسلمان", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "مسلمانند": [ - {ORTH: "مسلمان", LEMMA: "مسلمان", NORM: "مسلمان", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مشتریانش": [ - {ORTH: "مشتریان", LEMMA: "مشتریان", NORM: "مشتریان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مشتهایمان": [ - {ORTH: "مشتهای", LEMMA: "مشتهای", NORM: "مشتهای", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "مشخصند": [ - {ORTH: "مشخص", LEMMA: "مشخص", NORM: "مشخص", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مشغولند": [ - {ORTH: "مشغول", LEMMA: "مشغول", NORM: "مشغول", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مشغولیم": [ - {ORTH: "مشغول", LEMMA: "مشغول", NORM: "مشغول", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "مشهورش": [ - {ORTH: "مشهور", LEMMA: "مشهور", NORM: "مشهور", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مشکلاتشان": [ - {ORTH: "مشکلات", LEMMA: "مشکلات", NORM: "مشکلات", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مشکلم": [ - {ORTH: "مشکل", LEMMA: "مشکل", NORM: "مشکل", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "مطمئنم": [ - {ORTH: "مطمئن", LEMMA: "مطمئن", NORM: "مطمئن", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "معامله‌مان": [ - {ORTH: "معامله‌", LEMMA: "معامله‌", NORM: "معامله‌", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "معتقدم": [ - {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "معتقدند": [ - {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "معتقدیم": [ - {ORTH: "معتقد", LEMMA: "معتقد", NORM: "معتقد", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "معرفی‌اش": [ - {ORTH: "معرفی‌", LEMMA: "معرفی‌", NORM: "معرفی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "معروفش": [ - {ORTH: "معروف", LEMMA: "معروف", NORM: "معروف", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "معضلاتمان": [ - {ORTH: "معضلات", LEMMA: "معضلات", NORM: "معضلات", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "معلمش": [ - {ORTH: "معلم", LEMMA: "معلم", NORM: "معلم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "معنایش": [ - {ORTH: "معنای", LEMMA: "معنای", NORM: "معنای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مغزشان": [ - {ORTH: "مغز", LEMMA: "مغز", NORM: "مغز", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مفیدند": [ - {ORTH: "مفید", LEMMA: "مفید", NORM: "مفید", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مقابلش": [ - {ORTH: "مقابل", LEMMA: "مقابل", NORM: "مقابل", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مقاله‌اش": [ - {ORTH: "مقاله‌", LEMMA: "مقاله‌", NORM: "مقاله‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "مقدمش": [ - {ORTH: "مقدم", LEMMA: "مقدم", NORM: "مقدم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مقرش": [ - {ORTH: "مقر", LEMMA: "مقر", NORM: "مقر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مقصدشان": [ - {ORTH: "مقصد", LEMMA: "مقصد", NORM: "مقصد", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مقصرند": [ - {ORTH: "مقصر", LEMMA: "مقصر", NORM: "مقصر", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مقصودتان": [ - {ORTH: "مقصود", LEMMA: "مقصود", NORM: "مقصود", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "ملاقاتهایش": [ - {ORTH: "ملاقاتهای", LEMMA: "ملاقاتهای", NORM: "ملاقاتهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ممکنشان": [ - {ORTH: "ممکن", LEMMA: "ممکن", NORM: "ممکن", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "ممیزیهاست": [ - {ORTH: "ممیزیها", LEMMA: "ممیزیها", NORM: "ممیزیها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "منظورم": [ - {ORTH: "منظور", LEMMA: "منظور", NORM: "منظور", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "منی": [ - {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "منید": [ - {ORTH: "من", LEMMA: "من", NORM: "من", TAG: "NOUN"}, - {ORTH: "ید", LEMMA: "ید", NORM: "ید", TAG: "VERB"}, - ], - "مهربانش": [ - {ORTH: "مهربان", LEMMA: "مهربان", NORM: "مهربان", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "مهم‌اند": [ - {ORTH: "مهم‌", LEMMA: "مهم‌", NORM: "مهم‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "مواجهند": [ - {ORTH: "مواجه", LEMMA: "مواجه", NORM: "مواجه", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "مواجه‌اند": [ - {ORTH: "مواجه‌", LEMMA: "مواجه‌", NORM: "مواجه‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "مواخذه‌ات": [ - {ORTH: "مواخذه", LEMMA: "مواخذه", NORM: "مواخذه", TAG: "NOUN"}, - {ORTH: "‌ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "مواضعشان": [ - {ORTH: "مواضع", LEMMA: "مواضع", NORM: "مواضع", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "مواضعمان": [ - {ORTH: "مواضع", LEMMA: "مواضع", NORM: "مواضع", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "موافقند": [ - {ORTH: "موافق", LEMMA: "موافق", NORM: "موافق", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "موجوداتش": [ - {ORTH: "موجودات", LEMMA: "موجودات", NORM: "موجودات", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "موجودند": [ - {ORTH: "موجود", LEMMA: "موجود", NORM: "موجود", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "موردش": [ - {ORTH: "مورد", LEMMA: "مورد", NORM: "مورد", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "موضعشان": [ - {ORTH: "موضع", LEMMA: "موضع", NORM: "موضع", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "موظفند": [ - {ORTH: "موظف", LEMMA: "موظف", NORM: "موظف", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "موهایش": [ - {ORTH: "موهای", LEMMA: "موهای", NORM: "موهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "موهایمان": [ - {ORTH: "موهای", LEMMA: "موهای", NORM: "موهای", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "مویم": [ - {ORTH: "مو", LEMMA: "مو", NORM: "مو", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "ناخرسندند": [ - {ORTH: "ناخرسند", LEMMA: "ناخرسند", NORM: "ناخرسند", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "ناراحتیش": [ - {ORTH: "ناراحتی", LEMMA: "ناراحتی", NORM: "ناراحتی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "ناراضی‌اند": [ - {ORTH: "ناراضی‌", LEMMA: "ناراضی‌", NORM: "ناراضی‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "نارواست": [ - {ORTH: "ناروا", LEMMA: "ناروا", NORM: "ناروا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "نازش": [ - {ORTH: "ناز", LEMMA: "ناز", NORM: "ناز", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نامش": [ - {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نامشان": [ - {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نامم": [ - {ORTH: "نام", LEMMA: "نام", NORM: "نام", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "نامه‌ات": [ - {ORTH: "نامه", LEMMA: "نامه", NORM: "نامه", TAG: "NOUN"}, - {ORTH: "‌ات", LEMMA: "ات", NORM: "ات", TAG: "NOUN"}, - ], - "نامه‌ام": [ - {ORTH: "نامه‌", LEMMA: "نامه‌", NORM: "نامه‌", TAG: "NOUN"}, - {ORTH: "ام", LEMMA: "ام", NORM: "ام", TAG: "NOUN"}, - ], - "ناچارم": [ - {ORTH: "ناچار", LEMMA: "ناچار", NORM: "ناچار", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "نخست‌وزیری‌اش": [ - { - ORTH: "نخست‌وزیری‌", - LEMMA: "نخست‌وزیری‌", - NORM: "نخست‌وزیری‌", - TAG: "NOUN", - }, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "نزدش": [ - {ORTH: "نزد", LEMMA: "نزد", NORM: "نزد", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نشانم": [ - {ORTH: "نشان", LEMMA: "نشان", NORM: "نشان", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "نظرات‌شان": [ - {ORTH: "نظرات‌", LEMMA: "نظرات‌", NORM: "نظرات‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نظرتان": [ - {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "نظرش": [ - {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نظرشان": [ - {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نظرم": [ - {ORTH: "نظر", LEMMA: "نظر", NORM: "نظر", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "نظرهایشان": [ - {ORTH: "نظرهای", LEMMA: "نظرهای", NORM: "نظرهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نفاقش": [ - {ORTH: "نفاق", LEMMA: "نفاق", NORM: "نفاق", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نفرند": [ - {ORTH: "نفر", LEMMA: "نفر", NORM: "نفر", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "نفوذیند": [ - {ORTH: "نفوذی", LEMMA: "نفوذی", NORM: "نفوذی", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "نقطه‌نظراتتان": [ - {ORTH: "نقطه‌نظرات", LEMMA: "نقطه‌نظرات", NORM: "نقطه‌نظرات", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "نمایشی‌مان": [ - {ORTH: "نمایشی‌", LEMMA: "نمایشی‌", NORM: "نمایشی‌", TAG: "ADJ"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "نمایندگی‌شان": [ - {ORTH: "نمایندگی‌", LEMMA: "نمایندگی‌", NORM: "نمایندگی‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نمونه‌اش": [ - {ORTH: "نمونه‌", LEMMA: "نمونه‌", NORM: "نمونه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "نمی‌پذیرندش": [ - {ORTH: "نمی‌پذیرند", LEMMA: "نمی‌پذیرند", NORM: "نمی‌پذیرند", TAG: "VERB"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نوآوری‌اش": [ - {ORTH: "نوآوری‌", LEMMA: "نوآوری‌", NORM: "نوآوری‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "نوشته‌هایشان": [ - {ORTH: "نوشته‌های", LEMMA: "نوشته‌های", NORM: "نوشته‌های", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نوشته‌هایم": [ - {ORTH: "نوشته‌ها", LEMMA: "نوشته‌ها", NORM: "نوشته‌ها", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "نکردنشان": [ - {ORTH: "نکردن", LEMMA: "نکردن", NORM: "نکردن", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نگاهداری‌شان": [ - {ORTH: "نگاهداری‌", LEMMA: "نگاهداری‌", NORM: "نگاهداری‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نگاهش": [ - {ORTH: "نگاه", LEMMA: "نگاه", NORM: "نگاه", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "نگرانم": [ - {ORTH: "نگران", LEMMA: "نگران", NORM: "نگران", TAG: "ADJ"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "VERB"}, - ], - "نگرشهایشان": [ - {ORTH: "نگرشهای", LEMMA: "نگرشهای", NORM: "نگرشهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "نیازمندند": [ - {ORTH: "نیازمند", LEMMA: "نیازمند", NORM: "نیازمند", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "هدفش": [ - {ORTH: "هدف", LEMMA: "هدف", NORM: "هدف", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "همانست": [ - {ORTH: "همان", LEMMA: "همان", NORM: "همان", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "همراهش": [ - {ORTH: "همراه", LEMMA: "همراه", NORM: "همراه", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "همسرتان": [ - {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "همسرش": [ - {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "همسرم": [ - {ORTH: "همسر", LEMMA: "همسر", NORM: "همسر", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "همفکرانش": [ - {ORTH: "همفکران", LEMMA: "همفکران", NORM: "همفکران", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "همه‌اش": [ - {ORTH: "همه‌", LEMMA: "همه‌", NORM: "همه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "همه‌شان": [ - {ORTH: "همه‌", LEMMA: "همه‌", NORM: "همه‌", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "همکارانش": [ - {ORTH: "همکاران", LEMMA: "همکاران", NORM: "همکاران", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "هم‌نظریم": [ - {ORTH: "هم‌نظر", LEMMA: "هم‌نظر", NORM: "هم‌نظر", TAG: "ADJ"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "VERB"}, - ], - "هنرش": [ - {ORTH: "هنر", LEMMA: "هنر", NORM: "هنر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "هواست": [ - {ORTH: "هوا", LEMMA: "هوا", NORM: "هوا", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "هویتش": [ - {ORTH: "هویت", LEMMA: "هویت", NORM: "هویت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "وابسته‌اند": [ - {ORTH: "وابسته‌", LEMMA: "وابسته‌", NORM: "وابسته‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "واقفند": [ - {ORTH: "واقف", LEMMA: "واقف", NORM: "واقف", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "والدینشان": [ - {ORTH: "والدین", LEMMA: "والدین", NORM: "والدین", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "وجدان‌تان": [ - {ORTH: "وجدان‌", LEMMA: "وجدان‌", NORM: "وجدان‌", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "وجودشان": [ - {ORTH: "وجود", LEMMA: "وجود", NORM: "وجود", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "وطنم": [ - {ORTH: "وطن", LEMMA: "وطن", NORM: "وطن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "وعده‌اش": [ - {ORTH: "وعده‌", LEMMA: "وعده‌", NORM: "وعده‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "وقتمان": [ - {ORTH: "وقت", LEMMA: "وقت", NORM: "وقت", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "ولادتش": [ - {ORTH: "ولادت", LEMMA: "ولادت", NORM: "ولادت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پایانش": [ - {ORTH: "پایان", LEMMA: "پایان", NORM: "پایان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پایش": [ - {ORTH: "پای", LEMMA: "پای", NORM: "پای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پایین‌ترند": [ - {ORTH: "پایین‌تر", LEMMA: "پایین‌تر", NORM: "پایین‌تر", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "پدرت": [ - {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "پدرش": [ - {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پدرشان": [ - {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "پدرم": [ - {ORTH: "پدر", LEMMA: "پدر", NORM: "پدر", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "پربارش": [ - {ORTH: "پربار", LEMMA: "پربار", NORM: "پربار", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پروردگارت": [ - {ORTH: "پروردگار", LEMMA: "پروردگار", NORM: "پروردگار", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "پسرتان": [ - {ORTH: "پسر", LEMMA: "پسر", NORM: "پسر", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "پسرش": [ - {ORTH: "پسر", LEMMA: "پسر", NORM: "پسر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پسرعمویش": [ - {ORTH: "پسرعموی", LEMMA: "پسرعموی", NORM: "پسرعموی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پسر‌عمویت": [ - {ORTH: "پسر‌عموی", LEMMA: "پسر‌عموی", NORM: "پسر‌عموی", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "پشتش": [ - {ORTH: "پشت", LEMMA: "پشت", NORM: "پشت", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پشیمونی": [ - {ORTH: "پشیمون", LEMMA: "پشیمون", NORM: "پشیمون", TAG: "ADJ"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "پولش": [ - {ORTH: "پول", LEMMA: "پول", NORM: "پول", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پژوهش‌هایش": [ - {ORTH: "پژوهش‌های", LEMMA: "پژوهش‌های", NORM: "پژوهش‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پیامبرش": [ - {ORTH: "پیامبر", LEMMA: "پیامبر", NORM: "پیامبر", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پیامبری": [ - {ORTH: "پیامبر", LEMMA: "پیامبر", NORM: "پیامبر", TAG: "NOUN"}, - {ORTH: "ی", LEMMA: "ی", NORM: "ی", TAG: "VERB"}, - ], - "پیامش": [ - {ORTH: "پیام", LEMMA: "پیام", NORM: "پیام", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پیداست": [ - {ORTH: "پیدا", LEMMA: "پیدا", NORM: "پیدا", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "پیراهنش": [ - {ORTH: "پیراهن", LEMMA: "پیراهن", NORM: "پیراهن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پیروانش": [ - {ORTH: "پیروان", LEMMA: "پیروان", NORM: "پیروان", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "پیشانی‌اش": [ - {ORTH: "پیشانی‌", LEMMA: "پیشانی‌", NORM: "پیشانی‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "پیمانت": [ - {ORTH: "پیمان", LEMMA: "پیمان", NORM: "پیمان", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "پیوندشان": [ - {ORTH: "پیوند", LEMMA: "پیوند", NORM: "پیوند", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "چاپش": [ - {ORTH: "چاپ", LEMMA: "چاپ", NORM: "چاپ", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "چت": [ - {ORTH: "چ", LEMMA: "چ", NORM: "چ", TAG: "ADV"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "چته": [ - {ORTH: "چ", LEMMA: "چ", NORM: "چ", TAG: "ADV"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "چرخ‌هایش": [ - {ORTH: "چرخ‌های", LEMMA: "چرخ‌های", NORM: "چرخ‌های", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "چشمم": [ - {ORTH: "چشم", LEMMA: "چشم", NORM: "چشم", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "چشمهایش": [ - {ORTH: "چشمهای", LEMMA: "چشمهای", NORM: "چشمهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "چشمهایشان": [ - {ORTH: "چشمهای", LEMMA: "چشمهای", NORM: "چشمهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "چمنم": [ - {ORTH: "چمن", LEMMA: "چمن", NORM: "چمن", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "چهره‌اش": [ - {ORTH: "چهره‌", LEMMA: "چهره‌", NORM: "چهره‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "چکاره‌اند": [ - {ORTH: "چکاره‌", LEMMA: "چکاره‌", NORM: "چکاره‌", TAG: "ADV"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "چیزهاست": [ - {ORTH: "چیزها", LEMMA: "چیزها", NORM: "چیزها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "چیزهایش": [ - {ORTH: "چیزهای", LEMMA: "چیزهای", NORM: "چیزهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "چیزیست": [ - {ORTH: "چیزی", LEMMA: "چیزی", NORM: "چیزی", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "چیست": [ - {ORTH: "چی", LEMMA: "چی", NORM: "چی", TAG: "ADV"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کارش": [ - {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کارشان": [ - {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "کارم": [ - {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "کارند": [ - {ORTH: "کار", LEMMA: "کار", NORM: "کار", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "کارهایم": [ - {ORTH: "کارها", LEMMA: "کارها", NORM: "کارها", TAG: "NOUN"}, - {ORTH: "یم", LEMMA: "یم", NORM: "یم", TAG: "NOUN"}, - ], - "کافیست": [ - {ORTH: "کافی", LEMMA: "کافی", NORM: "کافی", TAG: "ADJ"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کتابخانه‌اش": [ - {ORTH: "کتابخانه‌", LEMMA: "کتابخانه‌", NORM: "کتابخانه‌", TAG: "NOUN"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "کتابش": [ - {ORTH: "کتاب", LEMMA: "کتاب", NORM: "کتاب", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کتابهاشان": [ - {ORTH: "کتابها", LEMMA: "کتابها", NORM: "کتابها", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "کجاست": [ - {ORTH: "کجا", LEMMA: "کجا", NORM: "کجا", TAG: "ADV"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کدورتهایشان": [ - {ORTH: "کدورتهای", LEMMA: "کدورتهای", NORM: "کدورتهای", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "کردنش": [ - {ORTH: "کردن", LEMMA: "کردن", NORM: "کردن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کرم‌خورده‌اش": [ - {ORTH: "کرم‌خورده‌", LEMMA: "کرم‌خورده‌", NORM: "کرم‌خورده‌", TAG: "ADJ"}, - {ORTH: "اش", LEMMA: "اش", NORM: "اش", TAG: "NOUN"}, - ], - "کشش": [ - {ORTH: "کش", LEMMA: "کش", NORM: "کش", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کشورش": [ - {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کشورشان": [ - {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "کشورمان": [ - {ORTH: "کشور", LEMMA: "کشور", NORM: "کشور", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "کشورهاست": [ - {ORTH: "کشورها", LEMMA: "کشورها", NORM: "کشورها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کلیشه‌هاست": [ - {ORTH: "کلیشه‌ها", LEMMA: "کلیشه‌ها", NORM: "کلیشه‌ها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کمبودهاست": [ - {ORTH: "کمبودها", LEMMA: "کمبودها", NORM: "کمبودها", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کمتره": [ - {ORTH: "کمتر", LEMMA: "کمتر", NORM: "کمتر", TAG: "ADJ"}, - {ORTH: "ه", LEMMA: "ه", NORM: "ه", TAG: "VERB"}, - ], - "کمکم": [ - {ORTH: "کمک", LEMMA: "کمک", NORM: "کمک", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "کنارش": [ - {ORTH: "کنار", LEMMA: "کنار", NORM: "کنار", TAG: "ADP"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کودکانشان": [ - {ORTH: "کودکان", LEMMA: "کودکان", NORM: "کودکان", TAG: "NOUN"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "کوچکش": [ - {ORTH: "کوچک", LEMMA: "کوچک", NORM: "کوچک", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "کیست": [ - {ORTH: "کی", LEMMA: "کی", NORM: "کی", TAG: "NOUN"}, - {ORTH: "ست", LEMMA: "ست", NORM: "ست", TAG: "VERB"}, - ], - "کیفش": [ - {ORTH: "کیف", LEMMA: "کیف", NORM: "کیف", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گذشته‌اند": [ - {ORTH: "گذشته‌", LEMMA: "گذشته‌", NORM: "گذشته‌", TAG: "ADJ"}, - {ORTH: "اند", LEMMA: "اند", NORM: "اند", TAG: "VERB"}, - ], - "گرانقدرش": [ - {ORTH: "گرانقدر", LEMMA: "گرانقدر", NORM: "گرانقدر", TAG: "ADJ"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گرانقدرشان": [ - {ORTH: "گرانقدر", LEMMA: "گرانقدر", NORM: "گرانقدر", TAG: "ADJ"}, - {ORTH: "شان", LEMMA: "شان", NORM: "شان", TAG: "NOUN"}, - ], - "گردنتان": [ - {ORTH: "گردن", LEMMA: "گردن", NORM: "گردن", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "گردنش": [ - {ORTH: "گردن", LEMMA: "گردن", NORM: "گردن", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گرفتارند": [ - {ORTH: "گرفتار", LEMMA: "گرفتار", NORM: "گرفتار", TAG: "ADJ"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "گرفتنت": [ - {ORTH: "گرفتن", LEMMA: "گرفتن", NORM: "گرفتن", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "گروهند": [ - {ORTH: "گروه", LEMMA: "گروه", NORM: "گروه", TAG: "NOUN"}, - {ORTH: "ند", LEMMA: "ند", NORM: "ند", TAG: "VERB"}, - ], - "گروگانهایش": [ - {ORTH: "گروگانهای", LEMMA: "گروگانهای", NORM: "گروگانهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گریمش": [ - {ORTH: "گریم", LEMMA: "گریم", NORM: "گریم", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گفتارمان": [ - {ORTH: "گفتار", LEMMA: "گفتار", NORM: "گفتار", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "گلهایش": [ - {ORTH: "گلهای", LEMMA: "گلهای", NORM: "گلهای", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گلویش": [ - {ORTH: "گلوی", LEMMA: "گلوی", NORM: "گلوی", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گناهت": [ - {ORTH: "گناه", LEMMA: "گناه", NORM: "گناه", TAG: "NOUN"}, - {ORTH: "ت", LEMMA: "ت", NORM: "ت", TAG: "NOUN"}, - ], - "گوشش": [ - {ORTH: "گوش", LEMMA: "گوش", NORM: "گوش", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "گوشم": [ - {ORTH: "گوش", LEMMA: "گوش", NORM: "گوش", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "گولش": [ - {ORTH: "گول", LEMMA: "گول", NORM: "گول", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - "یادتان": [ - {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"}, - {ORTH: "تان", LEMMA: "تان", NORM: "تان", TAG: "NOUN"}, - ], - "یادم": [ - {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"}, - {ORTH: "م", LEMMA: "م", NORM: "م", TAG: "NOUN"}, - ], - "یادمان": [ - {ORTH: "یاد", LEMMA: "یاد", NORM: "یاد", TAG: "NOUN"}, - {ORTH: "مان", LEMMA: "مان", NORM: "مان", TAG: "NOUN"}, - ], - "یارانش": [ - {ORTH: "یاران", LEMMA: "یاران", NORM: "یاران", TAG: "NOUN"}, - {ORTH: "ش", LEMMA: "ش", NORM: "ش", TAG: "NOUN"}, - ], - } -) -TOKENIZER_EXCEPTIONS = _exc diff --git a/spacy/lang/fi/tokenizer_exceptions.py b/spacy/lang/fi/tokenizer_exceptions.py index faaf609f9..22d710cb0 100644 --- a/spacy/lang/fi/tokenizer_exceptions.py +++ b/spacy/lang/fi/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH from ...util import update_exc @@ -8,74 +8,74 @@ _exc = {} # Source https://www.cs.tut.fi/~jkorpela/kielenopas/5.5.html for exc_data in [ - {ORTH: "aik.", LEMMA: "aikaisempi"}, - {ORTH: "alk.", LEMMA: "alkaen"}, - {ORTH: "alv.", LEMMA: "arvonlisävero"}, - {ORTH: "ark.", LEMMA: "arkisin"}, - {ORTH: "as.", LEMMA: "asunto"}, - {ORTH: "eaa.", LEMMA: "ennen ajanlaskun alkua"}, - {ORTH: "ed.", LEMMA: "edellinen"}, - {ORTH: "esim.", LEMMA: "esimerkki"}, - {ORTH: "huom.", LEMMA: "huomautus"}, - {ORTH: "jne.", LEMMA: "ja niin edelleen"}, - {ORTH: "joht.", LEMMA: "johtaja"}, - {ORTH: "k.", LEMMA: "kuollut"}, - {ORTH: "ks.", LEMMA: "katso"}, - {ORTH: "lk.", LEMMA: "luokka"}, - {ORTH: "lkm.", LEMMA: "lukumäärä"}, - {ORTH: "lyh.", LEMMA: "lyhenne"}, - {ORTH: "läh.", LEMMA: "lähettäjä"}, - {ORTH: "miel.", LEMMA: "mieluummin"}, - {ORTH: "milj.", LEMMA: "miljoona"}, - {ORTH: "Mm.", LEMMA: "muun muassa"}, - {ORTH: "mm.", LEMMA: "muun muassa"}, - {ORTH: "myöh.", LEMMA: "myöhempi"}, - {ORTH: "n.", LEMMA: "noin"}, - {ORTH: "nimim.", LEMMA: "nimimerkki"}, - {ORTH: "n:o", LEMMA: "numero"}, - {ORTH: "N:o", LEMMA: "numero"}, - {ORTH: "nro", LEMMA: "numero"}, - {ORTH: "ns.", LEMMA: "niin sanottu"}, - {ORTH: "nyk.", LEMMA: "nykyinen"}, - {ORTH: "oik.", LEMMA: "oikealla"}, - {ORTH: "os.", LEMMA: "osoite"}, - {ORTH: "p.", LEMMA: "päivä"}, - {ORTH: "par.", LEMMA: "paremmin"}, - {ORTH: "per.", LEMMA: "perustettu"}, - {ORTH: "pj.", LEMMA: "puheenjohtaja"}, - {ORTH: "puh.joht.", LEMMA: "puheenjohtaja"}, - {ORTH: "prof.", LEMMA: "professori"}, - {ORTH: "puh.", LEMMA: "puhelin"}, - {ORTH: "pvm.", LEMMA: "päivämäärä"}, - {ORTH: "rak.", LEMMA: "rakennettu"}, - {ORTH: "ry.", LEMMA: "rekisteröity yhdistys"}, - {ORTH: "s.", LEMMA: "sivu"}, - {ORTH: "siht.", LEMMA: "sihteeri"}, - {ORTH: "synt.", LEMMA: "syntynyt"}, - {ORTH: "t.", LEMMA: "toivoo"}, - {ORTH: "tark.", LEMMA: "tarkastanut"}, - {ORTH: "til.", LEMMA: "tilattu"}, - {ORTH: "tms.", LEMMA: "tai muuta sellaista"}, - {ORTH: "toim.", LEMMA: "toimittanut"}, - {ORTH: "v.", LEMMA: "vuosi"}, - {ORTH: "vas.", LEMMA: "vasen"}, - {ORTH: "vast.", LEMMA: "vastaus"}, - {ORTH: "vrt.", LEMMA: "vertaa"}, - {ORTH: "yht.", LEMMA: "yhteensä"}, - {ORTH: "yl.", LEMMA: "yleinen"}, - {ORTH: "ym.", LEMMA: "ynnä muuta"}, - {ORTH: "yms.", LEMMA: "ynnä muuta sellaista"}, - {ORTH: "yo.", LEMMA: "ylioppilas"}, - {ORTH: "yliopp.", LEMMA: "ylioppilas"}, - {ORTH: "ao.", LEMMA: "asianomainen"}, - {ORTH: "em.", LEMMA: "edellä mainittu"}, - {ORTH: "ko.", LEMMA: "kyseessä oleva"}, - {ORTH: "ml.", LEMMA: "mukaan luettuna"}, - {ORTH: "po.", LEMMA: "puheena oleva"}, - {ORTH: "so.", LEMMA: "se on"}, - {ORTH: "ts.", LEMMA: "toisin sanoen"}, - {ORTH: "vm.", LEMMA: "viimeksi mainittu"}, - {ORTH: "srk.", LEMMA: "seurakunta"}, + {ORTH: "aik."}, + {ORTH: "alk."}, + {ORTH: "alv."}, + {ORTH: "ark."}, + {ORTH: "as."}, + {ORTH: "eaa."}, + {ORTH: "ed."}, + {ORTH: "esim."}, + {ORTH: "huom."}, + {ORTH: "jne."}, + {ORTH: "joht."}, + {ORTH: "k."}, + {ORTH: "ks."}, + {ORTH: "lk."}, + {ORTH: "lkm."}, + {ORTH: "lyh."}, + {ORTH: "läh."}, + {ORTH: "miel."}, + {ORTH: "milj."}, + {ORTH: "Mm."}, + {ORTH: "mm."}, + {ORTH: "myöh."}, + {ORTH: "n."}, + {ORTH: "nimim."}, + {ORTH: "n:o"}, + {ORTH: "N:o"}, + {ORTH: "nro"}, + {ORTH: "ns."}, + {ORTH: "nyk."}, + {ORTH: "oik."}, + {ORTH: "os."}, + {ORTH: "p."}, + {ORTH: "par."}, + {ORTH: "per."}, + {ORTH: "pj."}, + {ORTH: "puh.joht."}, + {ORTH: "prof."}, + {ORTH: "puh."}, + {ORTH: "pvm."}, + {ORTH: "rak."}, + {ORTH: "ry."}, + {ORTH: "s."}, + {ORTH: "siht."}, + {ORTH: "synt."}, + {ORTH: "t."}, + {ORTH: "tark."}, + {ORTH: "til."}, + {ORTH: "tms."}, + {ORTH: "toim."}, + {ORTH: "v."}, + {ORTH: "vas."}, + {ORTH: "vast."}, + {ORTH: "vrt."}, + {ORTH: "yht."}, + {ORTH: "yl."}, + {ORTH: "ym."}, + {ORTH: "yms."}, + {ORTH: "yo."}, + {ORTH: "yliopp."}, + {ORTH: "ao."}, + {ORTH: "em."}, + {ORTH: "ko."}, + {ORTH: "ml."}, + {ORTH: "po."}, + {ORTH: "so."}, + {ORTH: "ts."}, + {ORTH: "vm."}, + {ORTH: "srk."}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/fr/tokenizer_exceptions.py b/spacy/lang/fr/tokenizer_exceptions.py index a1ad7bcbb..6f429eecc 100644 --- a/spacy/lang/fr/tokenizer_exceptions.py +++ b/spacy/lang/fr/tokenizer_exceptions.py @@ -3,7 +3,7 @@ import re from ..tokenizer_exceptions import BASE_EXCEPTIONS from .punctuation import ELISION, HYPHENS from ..char_classes import ALPHA_LOWER, ALPHA -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH from ...util import update_exc @@ -28,29 +28,29 @@ def lower_first_letter(text): return text[0].lower() + text[1:] -_exc = {"J.-C.": [{LEMMA: "Jésus", ORTH: "J."}, {LEMMA: "Christ", ORTH: "-C."}]} +_exc = {"J.-C.": [{ORTH: "J."}, {ORTH: "-C."}]} for exc_data in [ - {LEMMA: "avant", ORTH: "av."}, - {LEMMA: "janvier", ORTH: "janv."}, - {LEMMA: "février", ORTH: "févr."}, - {LEMMA: "avril", ORTH: "avr."}, - {LEMMA: "juillet", ORTH: "juill."}, - {LEMMA: "septembre", ORTH: "sept."}, - {LEMMA: "octobre", ORTH: "oct."}, - {LEMMA: "novembre", ORTH: "nov."}, - {LEMMA: "décembre", ORTH: "déc."}, - {LEMMA: "après", ORTH: "apr."}, - {LEMMA: "docteur", ORTH: "Dr."}, - {LEMMA: "monsieur", ORTH: "M."}, - {LEMMA: "monsieur", ORTH: "Mr."}, - {LEMMA: "madame", ORTH: "Mme."}, - {LEMMA: "mademoiselle", ORTH: "Mlle."}, - {LEMMA: "numéro", ORTH: "n°"}, - {LEMMA: "degrés", ORTH: "d°"}, - {LEMMA: "saint", ORTH: "St."}, - {LEMMA: "sainte", ORTH: "Ste."}, + {ORTH: "av."}, + {ORTH: "janv."}, + {ORTH: "févr."}, + {ORTH: "avr."}, + {ORTH: "juill."}, + {ORTH: "sept."}, + {ORTH: "oct."}, + {ORTH: "nov."}, + {ORTH: "déc."}, + {ORTH: "apr."}, + {ORTH: "Dr."}, + {ORTH: "M."}, + {ORTH: "Mr."}, + {ORTH: "Mme."}, + {ORTH: "Mlle."}, + {ORTH: "n°"}, + {ORTH: "d°"}, + {ORTH: "St."}, + {ORTH: "Ste."}, ]: _exc[exc_data[ORTH]] = [exc_data] @@ -80,55 +80,37 @@ for orth in [ _exc[orth] = [{ORTH: orth}] -for verb, verb_lemma in [ - ("a", "avoir"), - ("est", "être"), - ("semble", "sembler"), - ("indique", "indiquer"), - ("moque", "moquer"), - ("passe", "passer"), +for verb in [ + "a", + "est" "semble", + "indique", + "moque", + "passe", ]: for orth in [verb, verb.title()]: for pronoun in ["elle", "il", "on"]: token = f"{orth}-t-{pronoun}" - _exc[token] = [ - {LEMMA: verb_lemma, ORTH: orth}, # , TAG: "VERB"}, - {LEMMA: "t", ORTH: "-t"}, - {LEMMA: pronoun, ORTH: "-" + pronoun}, - ] + _exc[token] = [{ORTH: orth}, {ORTH: "-t"}, {ORTH: "-" + pronoun}] -for verb, verb_lemma in [("est", "être")]: +for verb in ["est"]: for orth in [verb, verb.title()]: - token = f"{orth}-ce" - _exc[token] = [ - {LEMMA: verb_lemma, ORTH: orth}, # , TAG: "VERB"}, - {LEMMA: "ce", ORTH: "-ce"}, - ] + _exc[f"{orth}-ce"] = [{ORTH: orth}, {ORTH: "-ce"}] -for pre, pre_lemma in [("qu'", "que"), ("n'", "ne")]: +for pre in ["qu'", "n'"]: for orth in [pre, pre.title()]: - _exc[f"{orth}est-ce"] = [ - {LEMMA: pre_lemma, ORTH: orth}, - {LEMMA: "être", ORTH: "est"}, - {LEMMA: "ce", ORTH: "-ce"}, - ] + _exc[f"{orth}est-ce"] = [{ORTH: orth}, {ORTH: "est"}, {ORTH: "-ce"}] for verb, pronoun in [("est", "il"), ("EST", "IL")]: - token = "{}-{}".format(verb, pronoun) - _exc[token] = [ - {LEMMA: "être", ORTH: verb}, - {LEMMA: pronoun, ORTH: "-" + pronoun}, - ] + _exc[f"{verb}-{pronoun}"] = [{ORTH: verb}, {ORTH: "-" + pronoun}] for s, verb, pronoun in [("s", "est", "il"), ("S", "EST", "IL")]: - token = "{}'{}-{}".format(s, verb, pronoun) - _exc[token] = [ - {LEMMA: "se", ORTH: s + "'"}, - {LEMMA: "être", ORTH: verb}, - {LEMMA: pronoun, ORTH: "-" + pronoun}, + _exc[f"{s}'{verb}-{pronoun}"] = [ + {ORTH: s + "'"}, + {ORTH: verb}, + {ORTH: "-" + pronoun}, ] diff --git a/spacy/lang/ga/tokenizer_exceptions.py b/spacy/lang/ga/tokenizer_exceptions.py index fbd6fa0f5..abf49c511 100644 --- a/spacy/lang/ga/tokenizer_exceptions.py +++ b/spacy/lang/ga/tokenizer_exceptions.py @@ -1,81 +1,65 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import POS, DET, ADP, CCONJ, ADV, NOUN, X, AUX -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc _exc = { - "'acha'n": [ - {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET}, - {ORTH: "a'n", LEMMA: "aon", NORM: "aon", POS: DET}, - ], - "dem'": [ - {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP}, - {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET}, - ], - "ded'": [ - {ORTH: "de", LEMMA: "de", NORM: "de", POS: ADP}, - {ORTH: "d'", LEMMA: "do", NORM: "do", POS: DET}, - ], - "lem'": [ - {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP}, - {ORTH: "m'", LEMMA: "mo", NORM: "mo", POS: DET}, - ], - "led'": [ - {ORTH: "le", LEMMA: "le", NORM: "le", POS: ADP}, - {ORTH: "d'", LEMMA: "mo", NORM: "do", POS: DET}, - ], + "'acha'n": [{ORTH: "'ach", NORM: "gach"}, {ORTH: "a'n", NORM: "aon"}], + "dem'": [{ORTH: "de", NORM: "de"}, {ORTH: "m'", NORM: "mo"}], + "ded'": [{ORTH: "de", NORM: "de"}, {ORTH: "d'", NORM: "do"}], + "lem'": [{ORTH: "le", NORM: "le"}, {ORTH: "m'", NORM: "mo"}], + "led'": [{ORTH: "le", NORM: "le"}, {ORTH: "d'", NORM: "do"}], } for exc_data in [ - {ORTH: "'gus", LEMMA: "agus", NORM: "agus", POS: CCONJ}, - {ORTH: "'ach", LEMMA: "gach", NORM: "gach", POS: DET}, - {ORTH: "ao'", LEMMA: "aon", NORM: "aon"}, - {ORTH: "'niar", LEMMA: "aniar", NORM: "aniar", POS: ADV}, - {ORTH: "'níos", LEMMA: "aníos", NORM: "aníos", POS: ADV}, - {ORTH: "'ndiu", LEMMA: "inniu", NORM: "inniu", POS: ADV}, - {ORTH: "'nocht", LEMMA: "anocht", NORM: "anocht", POS: ADV}, - {ORTH: "m'", LEMMA: "mo", POS: DET}, - {ORTH: "Aib.", LEMMA: "Aibreán", POS: NOUN}, - {ORTH: "Ath.", LEMMA: "athair", POS: NOUN}, - {ORTH: "Beal.", LEMMA: "Bealtaine", POS: NOUN}, - {ORTH: "a.C.n.", LEMMA: "ante Christum natum", POS: X}, - {ORTH: "m.sh.", LEMMA: "mar shampla", POS: ADV}, - {ORTH: "M.F.", LEMMA: "Meán Fómhair", POS: NOUN}, - {ORTH: "M.Fómh.", LEMMA: "Meán Fómhair", POS: NOUN}, - {ORTH: "D.F.", LEMMA: "Deireadh Fómhair", POS: NOUN}, - {ORTH: "D.Fómh.", LEMMA: "Deireadh Fómhair", POS: NOUN}, - {ORTH: "r.C.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "R.C.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "r.Ch.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "r.Chr.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "R.Ch.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "R.Chr.", LEMMA: "roimh Chríost", POS: ADV}, - {ORTH: "⁊rl.", LEMMA: "agus araile", POS: ADV}, - {ORTH: "srl.", LEMMA: "agus araile", POS: ADV}, - {ORTH: "Co.", LEMMA: "contae", POS: NOUN}, - {ORTH: "Ean.", LEMMA: "Eanáir", POS: NOUN}, - {ORTH: "Feab.", LEMMA: "Feabhra", POS: NOUN}, - {ORTH: "gCo.", LEMMA: "contae", POS: NOUN}, - {ORTH: ".i.", LEMMA: "eadhon", POS: ADV}, - {ORTH: "B'", LEMMA: "ba", POS: AUX}, - {ORTH: "b'", LEMMA: "ba", POS: AUX}, - {ORTH: "lch.", LEMMA: "leathanach", POS: NOUN}, - {ORTH: "Lch.", LEMMA: "leathanach", POS: NOUN}, - {ORTH: "lgh.", LEMMA: "leathanach", POS: NOUN}, - {ORTH: "Lgh.", LEMMA: "leathanach", POS: NOUN}, - {ORTH: "Lún.", LEMMA: "Lúnasa", POS: NOUN}, - {ORTH: "Már.", LEMMA: "Márta", POS: NOUN}, - {ORTH: "Meith.", LEMMA: "Meitheamh", POS: NOUN}, - {ORTH: "Noll.", LEMMA: "Nollaig", POS: NOUN}, - {ORTH: "Samh.", LEMMA: "Samhain", POS: NOUN}, - {ORTH: "tAth.", LEMMA: "athair", POS: NOUN}, - {ORTH: "tUas.", LEMMA: "Uasal", POS: NOUN}, - {ORTH: "teo.", LEMMA: "teoranta", POS: NOUN}, - {ORTH: "Teo.", LEMMA: "teoranta", POS: NOUN}, - {ORTH: "Uas.", LEMMA: "Uasal", POS: NOUN}, - {ORTH: "uimh.", LEMMA: "uimhir", POS: NOUN}, - {ORTH: "Uimh.", LEMMA: "uimhir", POS: NOUN}, + {ORTH: "'gus", NORM: "agus"}, + {ORTH: "'ach", NORM: "gach"}, + {ORTH: "ao'", NORM: "aon"}, + {ORTH: "'niar", NORM: "aniar"}, + {ORTH: "'níos", NORM: "aníos"}, + {ORTH: "'ndiu", NORM: "inniu"}, + {ORTH: "'nocht", NORM: "anocht"}, + {ORTH: "m'"}, + {ORTH: "Aib."}, + {ORTH: "Ath."}, + {ORTH: "Beal."}, + {ORTH: "a.C.n."}, + {ORTH: "m.sh."}, + {ORTH: "M.F."}, + {ORTH: "M.Fómh."}, + {ORTH: "D.F."}, + {ORTH: "D.Fómh."}, + {ORTH: "r.C."}, + {ORTH: "R.C."}, + {ORTH: "r.Ch."}, + {ORTH: "r.Chr."}, + {ORTH: "R.Ch."}, + {ORTH: "R.Chr."}, + {ORTH: "⁊rl."}, + {ORTH: "srl."}, + {ORTH: "Co."}, + {ORTH: "Ean."}, + {ORTH: "Feab."}, + {ORTH: "gCo."}, + {ORTH: ".i."}, + {ORTH: "B'"}, + {ORTH: "b'"}, + {ORTH: "lch."}, + {ORTH: "Lch."}, + {ORTH: "lgh."}, + {ORTH: "Lgh."}, + {ORTH: "Lún."}, + {ORTH: "Már."}, + {ORTH: "Meith."}, + {ORTH: "Noll."}, + {ORTH: "Samh."}, + {ORTH: "tAth."}, + {ORTH: "tUas."}, + {ORTH: "teo."}, + {ORTH: "Teo."}, + {ORTH: "Uas."}, + {ORTH: "uimh."}, + {ORTH: "Uimh."}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/id/tokenizer_exceptions.py b/spacy/lang/id/tokenizer_exceptions.py index 50ccfa33a..ff77ede9f 100644 --- a/spacy/lang/id/tokenizer_exceptions.py +++ b/spacy/lang/id/tokenizer_exceptions.py @@ -1,6 +1,6 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS from ._tokenizer_exceptions_list import ID_BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -11,53 +11,47 @@ _exc = {} for orth in ID_BASE_EXCEPTIONS: _exc[orth] = [{ORTH: orth}] - orth_title = orth.title() _exc[orth_title] = [{ORTH: orth_title}] - orth_caps = orth.upper() _exc[orth_caps] = [{ORTH: orth_caps}] - orth_lower = orth.lower() _exc[orth_lower] = [{ORTH: orth_lower}] - orth_first_upper = orth[0].upper() + orth[1:] _exc[orth_first_upper] = [{ORTH: orth_first_upper}] - if "-" in orth: orth_title = "-".join([part.title() for part in orth.split("-")]) _exc[orth_title] = [{ORTH: orth_title}] - orth_caps = "-".join([part.upper() for part in orth.split("-")]) _exc[orth_caps] = [{ORTH: orth_caps}] for exc_data in [ - {ORTH: "Jan.", LEMMA: "Januari", NORM: "Januari"}, - {ORTH: "Feb.", LEMMA: "Februari", NORM: "Februari"}, - {ORTH: "Mar.", LEMMA: "Maret", NORM: "Maret"}, - {ORTH: "Apr.", LEMMA: "April", NORM: "April"}, - {ORTH: "Jun.", LEMMA: "Juni", NORM: "Juni"}, - {ORTH: "Jul.", LEMMA: "Juli", NORM: "Juli"}, - {ORTH: "Agu.", LEMMA: "Agustus", NORM: "Agustus"}, - {ORTH: "Ags.", LEMMA: "Agustus", NORM: "Agustus"}, - {ORTH: "Sep.", LEMMA: "September", NORM: "September"}, - {ORTH: "Okt.", LEMMA: "Oktober", NORM: "Oktober"}, - {ORTH: "Nov.", LEMMA: "November", NORM: "November"}, - {ORTH: "Des.", LEMMA: "Desember", NORM: "Desember"}, + {ORTH: "Jan.", NORM: "Januari"}, + {ORTH: "Feb.", NORM: "Februari"}, + {ORTH: "Mar.", NORM: "Maret"}, + {ORTH: "Apr.", NORM: "April"}, + {ORTH: "Jun.", NORM: "Juni"}, + {ORTH: "Jul.", NORM: "Juli"}, + {ORTH: "Agu.", NORM: "Agustus"}, + {ORTH: "Ags.", NORM: "Agustus"}, + {ORTH: "Sep.", NORM: "September"}, + {ORTH: "Okt.", NORM: "Oktober"}, + {ORTH: "Nov.", NORM: "November"}, + {ORTH: "Des.", NORM: "Desember"}, ]: _exc[exc_data[ORTH]] = [exc_data] _other_exc = { - "do'a": [{ORTH: "do'a", LEMMA: "doa", NORM: "doa"}], - "jum'at": [{ORTH: "jum'at", LEMMA: "Jumat", NORM: "Jumat"}], - "Jum'at": [{ORTH: "Jum'at", LEMMA: "Jumat", NORM: "Jumat"}], - "la'nat": [{ORTH: "la'nat", LEMMA: "laknat", NORM: "laknat"}], - "ma'af": [{ORTH: "ma'af", LEMMA: "maaf", NORM: "maaf"}], - "mu'jizat": [{ORTH: "mu'jizat", LEMMA: "mukjizat", NORM: "mukjizat"}], - "Mu'jizat": [{ORTH: "Mu'jizat", LEMMA: "mukjizat", NORM: "mukjizat"}], - "ni'mat": [{ORTH: "ni'mat", LEMMA: "nikmat", NORM: "nikmat"}], - "raka'at": [{ORTH: "raka'at", LEMMA: "rakaat", NORM: "rakaat"}], - "ta'at": [{ORTH: "ta'at", LEMMA: "taat", NORM: "taat"}], + "do'a": [{ORTH: "do'a", NORM: "doa"}], + "jum'at": [{ORTH: "jum'at", NORM: "Jumat"}], + "Jum'at": [{ORTH: "Jum'at", NORM: "Jumat"}], + "la'nat": [{ORTH: "la'nat", NORM: "laknat"}], + "ma'af": [{ORTH: "ma'af", NORM: "maaf"}], + "mu'jizat": [{ORTH: "mu'jizat", NORM: "mukjizat"}], + "Mu'jizat": [{ORTH: "Mu'jizat", NORM: "mukjizat"}], + "ni'mat": [{ORTH: "ni'mat", NORM: "nikmat"}], + "raka'at": [{ORTH: "raka'at", NORM: "rakaat"}], + "ta'at": [{ORTH: "ta'at", NORM: "taat"}], } _exc.update(_other_exc) diff --git a/spacy/lang/it/tokenizer_exceptions.py b/spacy/lang/it/tokenizer_exceptions.py index c9c729d63..0c9968bc6 100644 --- a/spacy/lang/it/tokenizer_exceptions.py +++ b/spacy/lang/it/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH from ...util import update_exc @@ -10,7 +10,7 @@ _exc = { "L'art.": [{ORTH: "L'"}, {ORTH: "art."}], "l'art.": [{ORTH: "l'"}, {ORTH: "art."}], "nell'art.": [{ORTH: "nell'"}, {ORTH: "art."}], - "po'": [{ORTH: "po'", LEMMA: "poco"}], + "po'": [{ORTH: "po'"}], "sett..": [{ORTH: "sett."}, {ORTH: "."}], } diff --git a/spacy/lang/lb/tokenizer_exceptions.py b/spacy/lang/lb/tokenizer_exceptions.py index f6cdc7f34..d00dc9610 100644 --- a/spacy/lang/lb/tokenizer_exceptions.py +++ b/spacy/lang/lb/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -10,19 +10,19 @@ _exc = {} # translate / delete what is not necessary for exc_data in [ - {ORTH: "’t", LEMMA: "et", NORM: "et"}, - {ORTH: "’T", LEMMA: "et", NORM: "et"}, - {ORTH: "'t", LEMMA: "et", NORM: "et"}, - {ORTH: "'T", LEMMA: "et", NORM: "et"}, - {ORTH: "wgl.", LEMMA: "wannechgelift", NORM: "wannechgelift"}, - {ORTH: "M.", LEMMA: "Monsieur", NORM: "Monsieur"}, - {ORTH: "Mme.", LEMMA: "Madame", NORM: "Madame"}, - {ORTH: "Dr.", LEMMA: "Dokter", NORM: "Dokter"}, - {ORTH: "Tel.", LEMMA: "Telefon", NORM: "Telefon"}, - {ORTH: "asw.", LEMMA: "an sou weider", NORM: "an sou weider"}, - {ORTH: "etc.", LEMMA: "et cetera", NORM: "et cetera"}, - {ORTH: "bzw.", LEMMA: "bezéiungsweis", NORM: "bezéiungsweis"}, - {ORTH: "Jan.", LEMMA: "Januar", NORM: "Januar"}, + {ORTH: "’t", NORM: "et"}, + {ORTH: "’T", NORM: "et"}, + {ORTH: "'t", NORM: "et"}, + {ORTH: "'T", NORM: "et"}, + {ORTH: "wgl.", NORM: "wannechgelift"}, + {ORTH: "M.", NORM: "Monsieur"}, + {ORTH: "Mme.", NORM: "Madame"}, + {ORTH: "Dr.", NORM: "Dokter"}, + {ORTH: "Tel.", NORM: "Telefon"}, + {ORTH: "asw.", NORM: "an sou weider"}, + {ORTH: "etc.", NORM: "et cetera"}, + {ORTH: "bzw.", NORM: "bezéiungsweis"}, + {ORTH: "Jan.", NORM: "Januar"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/lij/tokenizer_exceptions.py b/spacy/lang/lij/tokenizer_exceptions.py index 61fa0df52..52eae2c89 100644 --- a/spacy/lang/lij/tokenizer_exceptions.py +++ b/spacy/lang/lij/tokenizer_exceptions.py @@ -1,53 +1,50 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH from ...util import update_exc _exc = {} -for raw, lemma in [ - ("a-a", "a-o"), - ("a-e", "a-o"), - ("a-o", "a-o"), - ("a-i", "a-o"), - ("co-a", "co-o"), - ("co-e", "co-o"), - ("co-i", "co-o"), - ("co-o", "co-o"), - ("da-a", "da-o"), - ("da-e", "da-o"), - ("da-i", "da-o"), - ("da-o", "da-o"), - ("pe-a", "pe-o"), - ("pe-e", "pe-o"), - ("pe-i", "pe-o"), - ("pe-o", "pe-o"), +for raw in [ + "a-e", + "a-o", + "a-i", + "a-a", + "co-a", + "co-e", + "co-i", + "co-o", + "da-a", + "da-e", + "da-i", + "da-o", + "pe-a", + "pe-e", + "pe-i", + "pe-o", ]: for orth in [raw, raw.capitalize()]: - _exc[orth] = [{ORTH: orth, LEMMA: lemma}] + _exc[orth] = [{ORTH: orth}] # Prefix + prepositions with à (e.g. "sott'a-o") -for prep, prep_lemma in [ - ("a-a", "a-o"), - ("a-e", "a-o"), - ("a-o", "a-o"), - ("a-i", "a-o"), +for prep in [ + "a-a", + "a-e", + "a-o", + "a-i", ]: - for prefix, prefix_lemma in [ - ("sott'", "sotta"), - ("sott’", "sotta"), - ("contr'", "contra"), - ("contr’", "contra"), - ("ch'", "che"), - ("ch’", "che"), - ("s'", "se"), - ("s’", "se"), + for prefix in [ + "sott'", + "sott’", + "contr'", + "contr’", + "ch'", + "ch’", + "s'", + "s’", ]: for prefix_orth in [prefix, prefix.capitalize()]: - _exc[prefix_orth + prep] = [ - {ORTH: prefix_orth, LEMMA: prefix_lemma}, - {ORTH: prep, LEMMA: prep_lemma}, - ] + _exc[prefix_orth + prep] = [{ORTH: prefix_orth}, {ORTH: prep}] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/nb/tokenizer_exceptions.py b/spacy/lang/nb/tokenizer_exceptions.py index 9a604cedc..0be436ae4 100644 --- a/spacy/lang/nb/tokenizer_exceptions.py +++ b/spacy/lang/nb/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc @@ -7,17 +7,17 @@ _exc = {} for exc_data in [ - {ORTH: "jan.", LEMMA: "januar"}, - {ORTH: "feb.", LEMMA: "februar"}, - {ORTH: "mar.", LEMMA: "mars"}, - {ORTH: "apr.", LEMMA: "april"}, - {ORTH: "jun.", LEMMA: "juni"}, - {ORTH: "jul.", LEMMA: "juli"}, - {ORTH: "aug.", LEMMA: "august"}, - {ORTH: "sep.", LEMMA: "september"}, - {ORTH: "okt.", LEMMA: "oktober"}, - {ORTH: "nov.", LEMMA: "november"}, - {ORTH: "des.", LEMMA: "desember"}, + {ORTH: "jan.", NORM: "januar"}, + {ORTH: "feb.", NORM: "februar"}, + {ORTH: "mar.", NORM: "mars"}, + {ORTH: "apr.", NORM: "april"}, + {ORTH: "jun.", NORM: "juni"}, + {ORTH: "jul.", NORM: "juli"}, + {ORTH: "aug.", NORM: "august"}, + {ORTH: "sep.", NORM: "september"}, + {ORTH: "okt.", NORM: "oktober"}, + {ORTH: "nov.", NORM: "november"}, + {ORTH: "des.", NORM: "desember"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/ru/tokenizer_exceptions.py b/spacy/lang/ru/tokenizer_exceptions.py index e4fbd2d75..1dc363fae 100644 --- a/spacy/lang/ru/tokenizer_exceptions.py +++ b/spacy/lang/ru/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -7,58 +7,56 @@ _exc = {} _abbrev_exc = [ # Weekdays abbreviations - {ORTH: "пн", LEMMA: "понедельник", NORM: "понедельник"}, - {ORTH: "вт", LEMMA: "вторник", NORM: "вторник"}, - {ORTH: "ср", LEMMA: "среда", NORM: "среда"}, - {ORTH: "чт", LEMMA: "четверг", NORM: "четверг"}, - {ORTH: "чтв", LEMMA: "четверг", NORM: "четверг"}, - {ORTH: "пт", LEMMA: "пятница", NORM: "пятница"}, - {ORTH: "сб", LEMMA: "суббота", NORM: "суббота"}, - {ORTH: "сбт", LEMMA: "суббота", NORM: "суббота"}, - {ORTH: "вс", LEMMA: "воскресенье", NORM: "воскресенье"}, - {ORTH: "вскр", LEMMA: "воскресенье", NORM: "воскресенье"}, - {ORTH: "воскр", LEMMA: "воскресенье", NORM: "воскресенье"}, + {ORTH: "пн", NORM: "понедельник"}, + {ORTH: "вт", NORM: "вторник"}, + {ORTH: "ср", NORM: "среда"}, + {ORTH: "чт", NORM: "четверг"}, + {ORTH: "чтв", NORM: "четверг"}, + {ORTH: "пт", NORM: "пятница"}, + {ORTH: "сб", NORM: "суббота"}, + {ORTH: "сбт", NORM: "суббота"}, + {ORTH: "вс", NORM: "воскресенье"}, + {ORTH: "вскр", NORM: "воскресенье"}, + {ORTH: "воскр", NORM: "воскресенье"}, # Months abbreviations - {ORTH: "янв", LEMMA: "январь", NORM: "январь"}, - {ORTH: "фев", LEMMA: "февраль", NORM: "февраль"}, - {ORTH: "февр", LEMMA: "февраль", NORM: "февраль"}, - {ORTH: "мар", LEMMA: "март", NORM: "март"}, - # {ORTH: "март", LEMMA: "март", NORM: "март"}, - {ORTH: "мрт", LEMMA: "март", NORM: "март"}, - {ORTH: "апр", LEMMA: "апрель", NORM: "апрель"}, - # {ORTH: "май", LEMMA: "май", NORM: "май"}, - {ORTH: "июн", LEMMA: "июнь", NORM: "июнь"}, - # {ORTH: "июнь", LEMMA: "июнь", NORM: "июнь"}, - {ORTH: "июл", LEMMA: "июль", NORM: "июль"}, - # {ORTH: "июль", LEMMA: "июль", NORM: "июль"}, - {ORTH: "авг", LEMMA: "август", NORM: "август"}, - {ORTH: "сен", LEMMA: "сентябрь", NORM: "сентябрь"}, - {ORTH: "сент", LEMMA: "сентябрь", NORM: "сентябрь"}, - {ORTH: "окт", LEMMA: "октябрь", NORM: "октябрь"}, - {ORTH: "октб", LEMMA: "октябрь", NORM: "октябрь"}, - {ORTH: "ноя", LEMMA: "ноябрь", NORM: "ноябрь"}, - {ORTH: "нояб", LEMMA: "ноябрь", NORM: "ноябрь"}, - {ORTH: "нбр", LEMMA: "ноябрь", NORM: "ноябрь"}, - {ORTH: "дек", LEMMA: "декабрь", NORM: "декабрь"}, + {ORTH: "янв", NORM: "январь"}, + {ORTH: "фев", NORM: "февраль"}, + {ORTH: "февр", NORM: "февраль"}, + {ORTH: "мар", NORM: "март"}, + # {ORTH: "март", NORM: "март"}, + {ORTH: "мрт", NORM: "март"}, + {ORTH: "апр", NORM: "апрель"}, + # {ORTH: "май", NORM: "май"}, + {ORTH: "июн", NORM: "июнь"}, + # {ORTH: "июнь", NORM: "июнь"}, + {ORTH: "июл", NORM: "июль"}, + # {ORTH: "июль", NORM: "июль"}, + {ORTH: "авг", NORM: "август"}, + {ORTH: "сен", NORM: "сентябрь"}, + {ORTH: "сент", NORM: "сентябрь"}, + {ORTH: "окт", NORM: "октябрь"}, + {ORTH: "октб", NORM: "октябрь"}, + {ORTH: "ноя", NORM: "ноябрь"}, + {ORTH: "нояб", NORM: "ноябрь"}, + {ORTH: "нбр", NORM: "ноябрь"}, + {ORTH: "дек", NORM: "декабрь"}, ] for abbrev_desc in _abbrev_exc: abbrev = abbrev_desc[ORTH] for orth in (abbrev, abbrev.capitalize(), abbrev.upper()): - _exc[orth] = [{ORTH: orth, LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]}] - _exc[orth + "."] = [ - {ORTH: orth + ".", LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]} - ] + _exc[orth] = [{ORTH: orth, NORM: abbrev_desc[NORM]}] + _exc[orth + "."] = [{ORTH: orth + ".", NORM: abbrev_desc[NORM]}] _slang_exc = [ - {ORTH: "2к15", LEMMA: "2015", NORM: "2015"}, - {ORTH: "2к16", LEMMA: "2016", NORM: "2016"}, - {ORTH: "2к17", LEMMA: "2017", NORM: "2017"}, - {ORTH: "2к18", LEMMA: "2018", NORM: "2018"}, - {ORTH: "2к19", LEMMA: "2019", NORM: "2019"}, - {ORTH: "2к20", LEMMA: "2020", NORM: "2020"}, + {ORTH: "2к15", NORM: "2015"}, + {ORTH: "2к16", NORM: "2016"}, + {ORTH: "2к17", NORM: "2017"}, + {ORTH: "2к18", NORM: "2018"}, + {ORTH: "2к19", NORM: "2019"}, + {ORTH: "2к20", NORM: "2020"}, ] for slang_desc in _slang_exc: diff --git a/spacy/lang/sr/tokenizer_exceptions.py b/spacy/lang/sr/tokenizer_exceptions.py index a41fe7e4e..dcaa3e239 100755 --- a/spacy/lang/sr/tokenizer_exceptions.py +++ b/spacy/lang/sr/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -7,85 +7,83 @@ _exc = {} _abbrev_exc = [ # Weekdays abbreviations - {ORTH: "пoн", LEMMA: "понедељак", NORM: "понедељак"}, - {ORTH: "уто", LEMMA: "уторак", NORM: "уторак"}, - {ORTH: "сре", LEMMA: "среда", NORM: "среда"}, - {ORTH: "чет", LEMMA: "четвртак", NORM: "четвртак"}, - {ORTH: "пет", LEMMA: "петак", NORM: "петак"}, - {ORTH: "суб", LEMMA: "субота", NORM: "субота"}, - {ORTH: "нед", LEMMA: "недеља", NORM: "недеља"}, + {ORTH: "пoн", NORM: "понедељак"}, + {ORTH: "уто", NORM: "уторак"}, + {ORTH: "сре", NORM: "среда"}, + {ORTH: "чет", NORM: "четвртак"}, + {ORTH: "пет", NORM: "петак"}, + {ORTH: "суб", NORM: "субота"}, + {ORTH: "нед", NORM: "недеља"}, # Months abbreviations - {ORTH: "јан", LEMMA: "јануар", NORM: "јануар"}, - {ORTH: "феб", LEMMA: "фебруар", NORM: "фебруар"}, - {ORTH: "мар", LEMMA: "март", NORM: "март"}, - {ORTH: "апр", LEMMA: "април", NORM: "април"}, - {ORTH: "јуни", LEMMA: "јун", NORM: "јун"}, - {ORTH: "јули", LEMMA: "јул", NORM: "јул"}, - {ORTH: "авг", LEMMA: "август", NORM: "август"}, - {ORTH: "сеп", LEMMA: "септембар", NORM: "септембар"}, - {ORTH: "септ", LEMMA: "септембар", NORM: "септембар"}, - {ORTH: "окт", LEMMA: "октобар", NORM: "октобар"}, - {ORTH: "нов", LEMMA: "новембар", NORM: "новембар"}, - {ORTH: "дец", LEMMA: "децембар", NORM: "децембар"}, + {ORTH: "јан", NORM: "јануар"}, + {ORTH: "феб", NORM: "фебруар"}, + {ORTH: "мар", NORM: "март"}, + {ORTH: "апр", NORM: "април"}, + {ORTH: "јуни", NORM: "јун"}, + {ORTH: "јули", NORM: "јул"}, + {ORTH: "авг", NORM: "август"}, + {ORTH: "сеп", NORM: "септембар"}, + {ORTH: "септ", NORM: "септембар"}, + {ORTH: "окт", NORM: "октобар"}, + {ORTH: "нов", NORM: "новембар"}, + {ORTH: "дец", NORM: "децембар"}, ] for abbrev_desc in _abbrev_exc: abbrev = abbrev_desc[ORTH] for orth in (abbrev, abbrev.capitalize(), abbrev.upper()): - _exc[orth] = [{ORTH: orth, LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]}] - _exc[orth + "."] = [ - {ORTH: orth + ".", LEMMA: abbrev_desc[LEMMA], NORM: abbrev_desc[NORM]} - ] + _exc[orth] = [{ORTH: orth, NORM: abbrev_desc[NORM]}] + _exc[orth + "."] = [{ORTH: orth + ".", NORM: abbrev_desc[NORM]}] # common abbreviations _slang_exc = [ # without dot - {ORTH: "др", LEMMA: "доктор", NORM: "доктор"}, - {ORTH: "гдин", LEMMA: "господин", NORM: "господин"}, - {ORTH: "гђа", LEMMA: "госпођа", NORM: "госпођа"}, - {ORTH: "гђица", LEMMA: "госпођица", NORM: "госпођица"}, - {ORTH: "мр", LEMMA: "магистар", NORM: "магистар"}, - {ORTH: "Бгд", LEMMA: "Београд", NORM: "београд"}, - {ORTH: "цм", LEMMA: "центиметар", NORM: "центиметар"}, - {ORTH: "м", LEMMA: "метар", NORM: "метар"}, - {ORTH: "км", LEMMA: "километар", NORM: "километар"}, - {ORTH: "мг", LEMMA: "милиграм", NORM: "милиграм"}, - {ORTH: "кг", LEMMA: "килограм", NORM: "килограм"}, - {ORTH: "дл", LEMMA: "децилитар", NORM: "децилитар"}, - {ORTH: "хл", LEMMA: "хектолитар", NORM: "хектолитар"}, + {ORTH: "др", NORM: "доктор"}, + {ORTH: "гдин", NORM: "господин"}, + {ORTH: "гђа", NORM: "госпођа"}, + {ORTH: "гђица", NORM: "госпођица"}, + {ORTH: "мр", NORM: "магистар"}, + {ORTH: "Бгд", NORM: "београд"}, + {ORTH: "цм", NORM: "центиметар"}, + {ORTH: "м", NORM: "метар"}, + {ORTH: "км", NORM: "километар"}, + {ORTH: "мг", NORM: "милиграм"}, + {ORTH: "кг", NORM: "килограм"}, + {ORTH: "дл", NORM: "децилитар"}, + {ORTH: "хл", NORM: "хектолитар"}, # with dot - {ORTH: "ул.", LEMMA: "улица", NORM: "улица"}, - {ORTH: "бр.", LEMMA: "број", NORM: "број"}, - {ORTH: "нпр.", LEMMA: "на пример", NORM: "на пример"}, - {ORTH: "тзв.", LEMMA: "такозван", NORM: "такозван"}, - {ORTH: "проф.", LEMMA: "професор", NORM: "професор"}, - {ORTH: "стр.", LEMMA: "страна", NORM: "страна"}, - {ORTH: "једн.", LEMMA: "једнина", NORM: "једнина"}, - {ORTH: "мн.", LEMMA: "множина", NORM: "множина"}, - {ORTH: "уч.", LEMMA: "ученик", NORM: "ученик"}, - {ORTH: "разр.", LEMMA: "разред", NORM: "разред"}, - {ORTH: "инж.", LEMMA: "инжењер", NORM: "инжењер"}, - {ORTH: "гимн.", LEMMA: "гимназија", NORM: "гимназија"}, - {ORTH: "год.", LEMMA: "година", NORM: "година"}, - {ORTH: "мед.", LEMMA: "медицина", NORM: "медицина"}, - {ORTH: "гимн.", LEMMA: "гимназија", NORM: "гимназија"}, - {ORTH: "акад.", LEMMA: "академик", NORM: "академик"}, - {ORTH: "доц.", LEMMA: "доцент", NORM: "доцент"}, - {ORTH: "итд.", LEMMA: "и тако даље", NORM: "и тако даље"}, - {ORTH: "и сл.", LEMMA: "и слично", NORM: "и слично"}, - {ORTH: "н.е.", LEMMA: "нова ера", NORM: "нове ере"}, - {ORTH: "о.г.", LEMMA: "ова година", NORM: "ове године"}, - {ORTH: "л.к.", LEMMA: "лична карта", NORM: "лична карта"}, - {ORTH: "в.д.", LEMMA: "вршилац дужности", NORM: "вршилац дужности"}, - {ORTH: "стр.", LEMMA: "страна", NORM: "страна"}, + {ORTH: "ул.", NORM: "улица"}, + {ORTH: "бр.", NORM: "број"}, + {ORTH: "нпр.", NORM: "на пример"}, + {ORTH: "тзв.", NORM: "такозван"}, + {ORTH: "проф.", NORM: "професор"}, + {ORTH: "стр.", NORM: "страна"}, + {ORTH: "једн.", NORM: "једнина"}, + {ORTH: "мн.", NORM: "множина"}, + {ORTH: "уч.", NORM: "ученик"}, + {ORTH: "разр.", NORM: "разред"}, + {ORTH: "инж.", NORM: "инжењер"}, + {ORTH: "гимн.", NORM: "гимназија"}, + {ORTH: "год.", NORM: "година"}, + {ORTH: "мед.", NORM: "медицина"}, + {ORTH: "гимн.", NORM: "гимназија"}, + {ORTH: "акад.", NORM: "академик"}, + {ORTH: "доц.", NORM: "доцент"}, + {ORTH: "итд.", NORM: "и тако даље"}, + {ORTH: "и сл.", NORM: "и слично"}, + {ORTH: "н.е.", NORM: "нове ере"}, + {ORTH: "о.г.", NORM: "ове године"}, + {ORTH: "л.к.", NORM: "лична карта"}, + {ORTH: "в.д.", NORM: "вршилац дужности"}, + {ORTH: "стр.", NORM: "страна"}, # with qoute - {ORTH: "ал'", LEMMA: "али", NORM: "али"}, - {ORTH: "ил'", LEMMA: "или", NORM: "или"}, - {ORTH: "је л'", LEMMA: "је ли", NORM: "је ли"}, - {ORTH: "да л'", LEMMA: "да ли", NORM: "да ли"}, - {ORTH: "држ'те", LEMMA: "држати", NORM: "држите"}, + {ORTH: "ал'", NORM: "али"}, + {ORTH: "ил'", NORM: "или"}, + {ORTH: "је л'", NORM: "је ли"}, + {ORTH: "да л'", NORM: "да ли"}, + {ORTH: "држ'те", NORM: "држите"}, ] for slang_desc in _slang_exc: diff --git a/spacy/lang/sv/tokenizer_exceptions.py b/spacy/lang/sv/tokenizer_exceptions.py index f1b914bff..64206f2f2 100644 --- a/spacy/lang/sv/tokenizer_exceptions.py +++ b/spacy/lang/sv/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import LEMMA, NORM, ORTH, PRON_LEMMA +from ...symbols import NORM, ORTH from ...util import update_exc _exc = {} @@ -10,61 +10,58 @@ _exc = {} for verb_data in [ {ORTH: "driver"}, {ORTH: "kör"}, - {ORTH: "hörr", LEMMA: "hör"}, + {ORTH: "hörr"}, {ORTH: "fattar"}, - {ORTH: "hajar", LEMMA: "förstår"}, + {ORTH: "hajar"}, {ORTH: "lever"}, - {ORTH: "serr", LEMMA: "ser"}, + {ORTH: "serr"}, {ORTH: "fixar"}, ]: verb_data_tc = dict(verb_data) verb_data_tc[ORTH] = verb_data_tc[ORTH].title() for data in [verb_data, verb_data_tc]: - _exc[data[ORTH] + "u"] = [ - dict(data), - {ORTH: "u", LEMMA: PRON_LEMMA, NORM: "du"}, - ] + _exc[data[ORTH] + "u"] = [data, {ORTH: "u", NORM: "du"}] # Abbreviations for weekdays "sön." (for "söndag" / "söner") # are left out because they are ambiguous. The same is the case # for abbreviations "jul." and "Jul." ("juli" / "jul"). for exc_data in [ - {ORTH: "jan.", LEMMA: "januari"}, - {ORTH: "febr.", LEMMA: "februari"}, - {ORTH: "feb.", LEMMA: "februari"}, - {ORTH: "apr.", LEMMA: "april"}, - {ORTH: "jun.", LEMMA: "juni"}, - {ORTH: "aug.", LEMMA: "augusti"}, - {ORTH: "sept.", LEMMA: "september"}, - {ORTH: "sep.", LEMMA: "september"}, - {ORTH: "okt.", LEMMA: "oktober"}, - {ORTH: "nov.", LEMMA: "november"}, - {ORTH: "dec.", LEMMA: "december"}, - {ORTH: "mån.", LEMMA: "måndag"}, - {ORTH: "tis.", LEMMA: "tisdag"}, - {ORTH: "ons.", LEMMA: "onsdag"}, - {ORTH: "tors.", LEMMA: "torsdag"}, - {ORTH: "fre.", LEMMA: "fredag"}, - {ORTH: "lör.", LEMMA: "lördag"}, - {ORTH: "Jan.", LEMMA: "Januari"}, - {ORTH: "Febr.", LEMMA: "Februari"}, - {ORTH: "Feb.", LEMMA: "Februari"}, - {ORTH: "Apr.", LEMMA: "April"}, - {ORTH: "Jun.", LEMMA: "Juni"}, - {ORTH: "Aug.", LEMMA: "Augusti"}, - {ORTH: "Sept.", LEMMA: "September"}, - {ORTH: "Sep.", LEMMA: "September"}, - {ORTH: "Okt.", LEMMA: "Oktober"}, - {ORTH: "Nov.", LEMMA: "November"}, - {ORTH: "Dec.", LEMMA: "December"}, - {ORTH: "Mån.", LEMMA: "Måndag"}, - {ORTH: "Tis.", LEMMA: "Tisdag"}, - {ORTH: "Ons.", LEMMA: "Onsdag"}, - {ORTH: "Tors.", LEMMA: "Torsdag"}, - {ORTH: "Fre.", LEMMA: "Fredag"}, - {ORTH: "Lör.", LEMMA: "Lördag"}, - {ORTH: "sthlm", LEMMA: "Stockholm"}, - {ORTH: "gbg", LEMMA: "Göteborg"}, + {ORTH: "jan.", NORM: "januari"}, + {ORTH: "febr.", NORM: "februari"}, + {ORTH: "feb.", NORM: "februari"}, + {ORTH: "apr.", NORM: "april"}, + {ORTH: "jun.", NORM: "juni"}, + {ORTH: "aug.", NORM: "augusti"}, + {ORTH: "sept.", NORM: "september"}, + {ORTH: "sep.", NORM: "september"}, + {ORTH: "okt.", NORM: "oktober"}, + {ORTH: "nov.", NORM: "november"}, + {ORTH: "dec.", NORM: "december"}, + {ORTH: "mån.", NORM: "måndag"}, + {ORTH: "tis.", NORM: "tisdag"}, + {ORTH: "ons.", NORM: "onsdag"}, + {ORTH: "tors.", NORM: "torsdag"}, + {ORTH: "fre.", NORM: "fredag"}, + {ORTH: "lör.", NORM: "lördag"}, + {ORTH: "Jan.", NORM: "Januari"}, + {ORTH: "Febr.", NORM: "Februari"}, + {ORTH: "Feb.", NORM: "Februari"}, + {ORTH: "Apr.", NORM: "April"}, + {ORTH: "Jun.", NORM: "Juni"}, + {ORTH: "Aug.", NORM: "Augusti"}, + {ORTH: "Sept.", NORM: "September"}, + {ORTH: "Sep.", NORM: "September"}, + {ORTH: "Okt.", NORM: "Oktober"}, + {ORTH: "Nov.", NORM: "November"}, + {ORTH: "Dec.", NORM: "December"}, + {ORTH: "Mån.", NORM: "Måndag"}, + {ORTH: "Tis.", NORM: "Tisdag"}, + {ORTH: "Ons.", NORM: "Onsdag"}, + {ORTH: "Tors.", NORM: "Torsdag"}, + {ORTH: "Fre.", NORM: "Fredag"}, + {ORTH: "Lör.", NORM: "Lördag"}, + {ORTH: "sthlm", NORM: "Stockholm"}, + {ORTH: "gbg", NORM: "Göteborg"}, ]: _exc[exc_data[ORTH]] = [exc_data] @@ -154,6 +151,6 @@ for orth in ABBREVIATIONS: # Sentences ending in "i." (as in "... peka i."), "m." (as in "...än 2000 m."), # should be tokenized as two separate tokens. for orth in ["i", "m"]: - _exc[orth + "."] = [{ORTH: orth, LEMMA: orth, NORM: orth}, {ORTH: "."}] + _exc[orth + "."] = [{ORTH: orth, NORM: orth, NORM: orth}, {ORTH: "."}] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/th/tokenizer_exceptions.py b/spacy/lang/th/tokenizer_exceptions.py index 0529b3a99..92116d474 100644 --- a/spacy/lang/th/tokenizer_exceptions.py +++ b/spacy/lang/th/tokenizer_exceptions.py @@ -1,469 +1,438 @@ -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH _exc = { # หน่วยงานรัฐ / government agency - "กกต.": [{ORTH: "กกต.", LEMMA: "คณะกรรมการการเลือกตั้ง"}], - "กทท.": [{ORTH: "กทท.", LEMMA: "การท่าเรือแห่งประเทศไทย"}], - "กทพ.": [{ORTH: "กทพ.", LEMMA: "การทางพิเศษแห่งประเทศไทย"}], - "กบข.": [{ORTH: "กบข.", LEMMA: "กองทุนบำเหน็จบำนาญข้าราชการพลเรือน"}], - "กบว.": [{ORTH: "กบว.", LEMMA: "คณะกรรมการบริหารวิทยุกระจายเสียงและวิทยุโทรทัศน์"}], - "กปน.": [{ORTH: "กปน.", LEMMA: "การประปานครหลวง"}], - "กปภ.": [{ORTH: "กปภ.", LEMMA: "การประปาส่วนภูมิภาค"}], - "กปส.": [{ORTH: "กปส.", LEMMA: "กรมประชาสัมพันธ์"}], - "กผม.": [{ORTH: "กผม.", LEMMA: "กองผังเมือง"}], - "กฟน.": [{ORTH: "กฟน.", LEMMA: "การไฟฟ้านครหลวง"}], - "กฟผ.": [{ORTH: "กฟผ.", LEMMA: "การไฟฟ้าฝ่ายผลิตแห่งประเทศไทย"}], - "กฟภ.": [{ORTH: "กฟภ.", LEMMA: "การไฟฟ้าส่วนภูมิภาค"}], - "ก.ช.น.": [{ORTH: "ก.ช.น.", LEMMA: "คณะกรรมการช่วยเหลือชาวนาชาวไร่"}], - "กยศ.": [{ORTH: "กยศ.", LEMMA: "กองทุนเงินให้กู้ยืมเพื่อการศึกษา"}], - "ก.ล.ต.": [{ORTH: "ก.ล.ต.", LEMMA: "คณะกรรมการกำกับหลักทรัพย์และตลาดหลักทรัพย์"}], - "กศ.บ.": [{ORTH: "กศ.บ.", LEMMA: "การศึกษาบัณฑิต"}], - "กศน.": [{ORTH: "กศน.", LEMMA: "กรมการศึกษานอกโรงเรียน"}], - "กสท.": [{ORTH: "กสท.", LEMMA: "การสื่อสารแห่งประเทศไทย"}], - "กอ.รมน.": [{ORTH: "กอ.รมน.", LEMMA: "กองอำนวยการรักษาความมั่นคงภายใน"}], - "กร.": [{ORTH: "กร.", LEMMA: "กองเรือยุทธการ"}], - "ขสมก.": [{ORTH: "ขสมก.", LEMMA: "องค์การขนส่งมวลชนกรุงเทพ"}], - "คตง.": [{ORTH: "คตง.", LEMMA: "คณะกรรมการตรวจเงินแผ่นดิน"}], - "ครม.": [{ORTH: "ครม.", LEMMA: "คณะรัฐมนตรี"}], - "คมช.": [{ORTH: "คมช.", LEMMA: "คณะมนตรีความมั่นคงแห่งชาติ"}], - "ตชด.": [{ORTH: "ตชด.", LEMMA: "ตำรวจตะเวนชายเดน"}], - "ตม.": [{ORTH: "ตม.", LEMMA: "กองตรวจคนเข้าเมือง"}], - "ตร.": [{ORTH: "ตร.", LEMMA: "ตำรวจ"}], - "ททท.": [{ORTH: "ททท.", LEMMA: "การท่องเที่ยวแห่งประเทศไทย"}], - "ททบ.": [{ORTH: "ททบ.", LEMMA: "สถานีวิทยุโทรทัศน์กองทัพบก"}], - "ทบ.": [{ORTH: "ทบ.", LEMMA: "กองทัพบก"}], - "ทร.": [{ORTH: "ทร.", LEMMA: "กองทัพเรือ"}], - "ทอ.": [{ORTH: "ทอ.", LEMMA: "กองทัพอากาศ"}], - "ทอท.": [{ORTH: "ทอท.", LEMMA: "การท่าอากาศยานแห่งประเทศไทย"}], - "ธ.ก.ส.": [{ORTH: "ธ.ก.ส.", LEMMA: "ธนาคารเพื่อการเกษตรและสหกรณ์การเกษตร"}], - "ธปท.": [{ORTH: "ธปท.", LEMMA: "ธนาคารแห่งประเทศไทย"}], - "ธอส.": [{ORTH: "ธอส.", LEMMA: "ธนาคารอาคารสงเคราะห์"}], - "นย.": [{ORTH: "นย.", LEMMA: "นาวิกโยธิน"}], - "ปตท.": [{ORTH: "ปตท.", LEMMA: "การปิโตรเลียมแห่งประเทศไทย"}], - "ป.ป.ช.": [ - { - ORTH: "ป.ป.ช.", - LEMMA: "คณะกรรมการป้องกันและปราบปรามการทุจริตและประพฤติมิชอบในวงราชการ", - } - ], - "ป.ป.ส.": [{ORTH: "ป.ป.ส.", LEMMA: "คณะกรรมการป้องกันและปราบปรามยาเสพติด"}], - "บพร.": [{ORTH: "บพร.", LEMMA: "กรมการบินพลเรือน"}], - "บย.": [{ORTH: "บย.", LEMMA: "กองบินยุทธการ"}], - "พสวท.": [ - { - ORTH: "พสวท.", - LEMMA: "โครงการพัฒนาและส่งเสริมผู้มีความรู้ความสามารถพิเศษทางวิทยาศาสตร์และเทคโนโลยี", - } - ], - "มอก.": [{ORTH: "มอก.", LEMMA: "สำนักงานมาตรฐานผลิตภัณฑ์อุตสาหกรรม"}], - "ยธ.": [{ORTH: "ยธ.", LEMMA: "กรมโยธาธิการ"}], - "รพช.": [{ORTH: "รพช.", LEMMA: "สำนักงานเร่งรัดพัฒนาชนบท"}], - "รฟท.": [{ORTH: "รฟท.", LEMMA: "การรถไฟแห่งประเทศไทย"}], - "รฟม.": [{ORTH: "รฟม.", LEMMA: "การรถไฟฟ้าขนส่งมวลชนแห่งประเทศไทย"}], - "ศธ.": [{ORTH: "ศธ.", LEMMA: "กระทรวงศึกษาธิการ"}], - "ศนธ.": [{ORTH: "ศนธ.", LEMMA: "ศูนย์กลางนิสิตนักศึกษาแห่งประเทศไทย"}], - "สกจ.": [{ORTH: "สกจ.", LEMMA: "สหกรณ์จังหวัด"}], - "สกท.": [{ORTH: "สกท.", LEMMA: "สำนักงานคณะกรรมการส่งเสริมการลงทุน"}], - "สกว.": [{ORTH: "สกว.", LEMMA: "สำนักงานกองทุนสนับสนุนการวิจัย"}], - "สคบ.": [{ORTH: "สคบ.", LEMMA: "สำนักงานคณะกรรมการคุ้มครองผู้บริโภค"}], - "สจร.": [{ORTH: "สจร.", LEMMA: "สำนักงานคณะกรรมการจัดระบบการจราจรทางบก"}], - "สตง.": [{ORTH: "สตง.", LEMMA: "สำนักงานตรวจเงินแผ่นดิน"}], - "สทท.": [{ORTH: "สทท.", LEMMA: "สถานีวิทยุโทรทัศน์แห่งประเทศไทย"}], - "สทร.": [{ORTH: "สทร.", LEMMA: "สำนักงานกลางทะเบียนราษฎร์"}], - "สธ": [{ORTH: "สธ", LEMMA: "กระทรวงสาธารณสุข"}], - "สนช.": [{ORTH: "สนช.", LEMMA: "สภานิติบัญญัติแห่งชาติ,สำนักงานนวัตกรรมแห่งชาติ"}], - "สนนท.": [{ORTH: "สนนท.", LEMMA: "สหพันธ์นิสิตนักศึกษาแห่งประเทศไทย"}], - "สปก.": [{ORTH: "สปก.", LEMMA: "สำนักงานการปฏิรูปที่ดินเพื่อเกษตรกรรม"}], - "สปช.": [{ORTH: "สปช.", LEMMA: "สำนักงานคณะกรรมการการประถมศึกษาแห่งชาติ"}], - "สปอ.": [{ORTH: "สปอ.", LEMMA: "สำนักงานการประถมศึกษาอำเภอ"}], - "สพช.": [{ORTH: "สพช.", LEMMA: "สำนักงานคณะกรรมการนโยบายพลังงานแห่งชาติ"}], - "สยช.": [ - {ORTH: "สยช.", LEMMA: "สำนักงานคณะกรรมการส่งเสริมและประสานงานเยาวชนแห่งชาติ"} - ], - "สวช.": [{ORTH: "สวช.", LEMMA: "สำนักงานคณะกรรมการวัฒนธรรมแห่งชาติ"}], - "สวท.": [{ORTH: "สวท.", LEMMA: "สถานีวิทยุกระจายเสียงแห่งประเทศไทย"}], - "สวทช.": [{ORTH: "สวทช.", LEMMA: "สำนักงานพัฒนาวิทยาศาสตร์และเทคโนโลยีแห่งชาติ"}], - "สคช.": [ - {ORTH: "สคช.", LEMMA: "สำนักงานคณะกรรมการพัฒนาการเศรษฐกิจและสังคมแห่งชาติ"} - ], - "สสว.": [{ORTH: "สสว.", LEMMA: "สำนักงานส่งเสริมวิสาหกิจขนาดกลางและขนาดย่อม"}], - "สสส.": [{ORTH: "สสส.", LEMMA: "สำนักงานกองทุนสนับสนุนการสร้างเสริมสุขภาพ"}], - "สสวท.": [{ORTH: "สสวท.", LEMMA: "สถาบันส่งเสริมการสอนวิทยาศาสตร์และเทคโนโลยี"}], - "อตก.": [{ORTH: "อตก.", LEMMA: "องค์การตลาดเพื่อเกษตรกร"}], - "อบจ.": [{ORTH: "อบจ.", LEMMA: "องค์การบริหารส่วนจังหวัด"}], - "อบต.": [{ORTH: "อบต.", LEMMA: "องค์การบริหารส่วนตำบล"}], - "อปพร.": [{ORTH: "อปพร.", LEMMA: "อาสาสมัครป้องกันภัยฝ่ายพลเรือน"}], - "อย.": [{ORTH: "อย.", LEMMA: "สำนักงานคณะกรรมการอาหารและยา"}], - "อ.ส.ม.ท.": [{ORTH: "อ.ส.ม.ท.", LEMMA: "องค์การสื่อสารมวลชนแห่งประเทศไทย"}], + "กกต.": [{ORTH: "กกต."}], + "กทท.": [{ORTH: "กทท."}], + "กทพ.": [{ORTH: "กทพ."}], + "กบข.": [{ORTH: "กบข."}], + "กบว.": [{ORTH: "กบว."}], + "กปน.": [{ORTH: "กปน."}], + "กปภ.": [{ORTH: "กปภ."}], + "กปส.": [{ORTH: "กปส."}], + "กผม.": [{ORTH: "กผม."}], + "กฟน.": [{ORTH: "กฟน."}], + "กฟผ.": [{ORTH: "กฟผ."}], + "กฟภ.": [{ORTH: "กฟภ."}], + "ก.ช.น.": [{ORTH: "ก.ช.น."}], + "กยศ.": [{ORTH: "กยศ."}], + "ก.ล.ต.": [{ORTH: "ก.ล.ต."}], + "กศ.บ.": [{ORTH: "กศ.บ."}], + "กศน.": [{ORTH: "กศน."}], + "กสท.": [{ORTH: "กสท."}], + "กอ.รมน.": [{ORTH: "กอ.รมน."}], + "กร.": [{ORTH: "กร."}], + "ขสมก.": [{ORTH: "ขสมก."}], + "คตง.": [{ORTH: "คตง."}], + "ครม.": [{ORTH: "ครม."}], + "คมช.": [{ORTH: "คมช."}], + "ตชด.": [{ORTH: "ตชด."}], + "ตม.": [{ORTH: "ตม."}], + "ตร.": [{ORTH: "ตร."}], + "ททท.": [{ORTH: "ททท."}], + "ททบ.": [{ORTH: "ททบ."}], + "ทบ.": [{ORTH: "ทบ."}], + "ทร.": [{ORTH: "ทร."}], + "ทอ.": [{ORTH: "ทอ."}], + "ทอท.": [{ORTH: "ทอท."}], + "ธ.ก.ส.": [{ORTH: "ธ.ก.ส."}], + "ธปท.": [{ORTH: "ธปท."}], + "ธอส.": [{ORTH: "ธอส."}], + "นย.": [{ORTH: "นย."}], + "ปตท.": [{ORTH: "ปตท."}], + "ป.ป.ช.": [{ORTH: "ป.ป.ช."}], + "ป.ป.ส.": [{ORTH: "ป.ป.ส."}], + "บพร.": [{ORTH: "บพร."}], + "บย.": [{ORTH: "บย."}], + "พสวท.": [{ORTH: "พสวท."}], + "มอก.": [{ORTH: "มอก."}], + "ยธ.": [{ORTH: "ยธ."}], + "รพช.": [{ORTH: "รพช."}], + "รฟท.": [{ORTH: "รฟท."}], + "รฟม.": [{ORTH: "รฟม."}], + "ศธ.": [{ORTH: "ศธ."}], + "ศนธ.": [{ORTH: "ศนธ."}], + "สกจ.": [{ORTH: "สกจ."}], + "สกท.": [{ORTH: "สกท."}], + "สกว.": [{ORTH: "สกว."}], + "สคบ.": [{ORTH: "สคบ."}], + "สจร.": [{ORTH: "สจร."}], + "สตง.": [{ORTH: "สตง."}], + "สทท.": [{ORTH: "สทท."}], + "สทร.": [{ORTH: "สทร."}], + "สธ": [{ORTH: "สธ"}], + "สนช.": [{ORTH: "สนช."}], + "สนนท.": [{ORTH: "สนนท."}], + "สปก.": [{ORTH: "สปก."}], + "สปช.": [{ORTH: "สปช."}], + "สปอ.": [{ORTH: "สปอ."}], + "สพช.": [{ORTH: "สพช."}], + "สยช.": [{ORTH: "สยช."}], + "สวช.": [{ORTH: "สวช."}], + "สวท.": [{ORTH: "สวท."}], + "สวทช.": [{ORTH: "สวทช."}], + "สคช.": [{ORTH: "สคช."}], + "สสว.": [{ORTH: "สสว."}], + "สสส.": [{ORTH: "สสส."}], + "สสวท.": [{ORTH: "สสวท."}], + "อตก.": [{ORTH: "อตก."}], + "อบจ.": [{ORTH: "อบจ."}], + "อบต.": [{ORTH: "อบต."}], + "อปพร.": [{ORTH: "อปพร."}], + "อย.": [{ORTH: "อย."}], + "อ.ส.ม.ท.": [{ORTH: "อ.ส.ม.ท."}], # มหาวิทยาลัย / สถานศึกษา / university / college - "มทส.": [{ORTH: "มทส.", LEMMA: "มหาวิทยาลัยเทคโนโลยีสุรนารี"}], - "มธ.": [{ORTH: "มธ.", LEMMA: "มหาวิทยาลัยธรรมศาสตร์"}], - "ม.อ.": [{ORTH: "ม.อ.", LEMMA: "มหาวิทยาลัยสงขลานครินทร์"}], - "มทร.": [{ORTH: "มทร.", LEMMA: "มหาวิทยาลัยเทคโนโลยีราชมงคล"}], - "มมส.": [{ORTH: "มมส.", LEMMA: "มหาวิทยาลัยมหาสารคาม"}], - "วท.": [{ORTH: "วท.", LEMMA: "วิทยาลัยเทคนิค"}], - "สตม.": [{ORTH: "สตม.", LEMMA: "สำนักงานตรวจคนเข้าเมือง (ตำรวจ)"}], + "มทส.": [{ORTH: "มทส."}], + "มธ.": [{ORTH: "มธ."}], + "ม.อ.": [{ORTH: "ม.อ."}], + "มทร.": [{ORTH: "มทร."}], + "มมส.": [{ORTH: "มมส."}], + "วท.": [{ORTH: "วท."}], + "สตม.": [{ORTH: "สตม."}], # ยศ / rank - "ดร.": [{ORTH: "ดร.", LEMMA: "ดอกเตอร์"}], - "ด.ต.": [{ORTH: "ด.ต.", LEMMA: "ดาบตำรวจ"}], - "จ.ต.": [{ORTH: "จ.ต.", LEMMA: "จ่าตรี"}], - "จ.ท.": [{ORTH: "จ.ท.", LEMMA: "จ่าโท"}], - "จ.ส.ต.": [{ORTH: "จ.ส.ต.", LEMMA: "จ่าสิบตรี (ทหารบก)"}], - "จสต.": [{ORTH: "จสต.", LEMMA: "จ่าสิบตำรวจ"}], - "จ.ส.ท.": [{ORTH: "จ.ส.ท.", LEMMA: "จ่าสิบโท"}], - "จ.ส.อ.": [{ORTH: "จ.ส.อ.", LEMMA: "จ่าสิบเอก"}], - "จ.อ.": [{ORTH: "จ.อ.", LEMMA: "จ่าเอก"}], - "ทพญ.": [{ORTH: "ทพญ.", LEMMA: "ทันตแพทย์หญิง"}], - "ทนพ.": [{ORTH: "ทนพ.", LEMMA: "เทคนิคการแพทย์"}], - "นจอ.": [{ORTH: "นจอ.", LEMMA: "นักเรียนจ่าอากาศ"}], - "น.ช.": [{ORTH: "น.ช.", LEMMA: "นักโทษชาย"}], - "น.ญ.": [{ORTH: "น.ญ.", LEMMA: "นักโทษหญิง"}], - "น.ต.": [{ORTH: "น.ต.", LEMMA: "นาวาตรี"}], - "น.ท.": [{ORTH: "น.ท.", LEMMA: "นาวาโท"}], - "นตท.": [{ORTH: "นตท.", LEMMA: "นักเรียนเตรียมทหาร"}], - "นนส.": [{ORTH: "นนส.", LEMMA: "นักเรียนนายสิบทหารบก"}], - "นนร.": [{ORTH: "นนร.", LEMMA: "นักเรียนนายร้อย"}], - "นนอ.": [{ORTH: "นนอ.", LEMMA: "นักเรียนนายเรืออากาศ"}], - "นพ.": [{ORTH: "นพ.", LEMMA: "นายแพทย์"}], - "นพท.": [{ORTH: "นพท.", LEMMA: "นายแพทย์ทหาร"}], - "นรจ.": [{ORTH: "นรจ.", LEMMA: "นักเรียนจ่าทหารเรือ"}], - "นรต.": [{ORTH: "นรต.", LEMMA: "นักเรียนนายร้อยตำรวจ"}], - "นศพ.": [{ORTH: "นศพ.", LEMMA: "นักศึกษาแพทย์"}], - "นศท.": [{ORTH: "นศท.", LEMMA: "นักศึกษาวิชาทหาร"}], - "น.สพ.": [{ORTH: "น.สพ.", LEMMA: "นายสัตวแพทย์ (พ.ร.บ.วิชาชีพการสัตวแพทย์)"}], - "น.อ.": [{ORTH: "น.อ.", LEMMA: "นาวาเอก"}], - "บช.ก.": [{ORTH: "บช.ก.", LEMMA: "กองบัญชาการตำรวจสอบสวนกลาง"}], - "บช.น.": [{ORTH: "บช.น.", LEMMA: "กองบัญชาการตำรวจนครบาล"}], - "ผกก.": [{ORTH: "ผกก.", LEMMA: "ผู้กำกับการ"}], - "ผกก.ภ.": [{ORTH: "ผกก.ภ.", LEMMA: "ผู้กำกับการตำรวจภูธร"}], - "ผจก.": [{ORTH: "ผจก.", LEMMA: "ผู้จัดการ"}], - "ผช.": [{ORTH: "ผช.", LEMMA: "ผู้ช่วย"}], - "ผชก.": [{ORTH: "ผชก.", LEMMA: "ผู้ชำนาญการ"}], - "ผช.ผอ.": [{ORTH: "ผช.ผอ.", LEMMA: "ผู้ช่วยผู้อำนวยการ"}], - "ผญบ.": [{ORTH: "ผญบ.", LEMMA: "ผู้ใหญ่บ้าน"}], - "ผบ.": [{ORTH: "ผบ.", LEMMA: "ผู้บังคับบัญชา"}], - "ผบก.": [{ORTH: "ผบก.", LEMMA: "ผู้บังคับบัญชาการ (ตำรวจ)"}], - "ผบก.น.": [{ORTH: "ผบก.น.", LEMMA: "ผู้บังคับการตำรวจนครบาล"}], - "ผบก.ป.": [{ORTH: "ผบก.ป.", LEMMA: "ผู้บังคับการตำรวจกองปราบปราม"}], - "ผบก.ปค.": [ - { - ORTH: "ผบก.ปค.", - LEMMA: "ผู้บังคับการ กองบังคับการปกครอง (โรงเรียนนายร้อยตำรวจ)", - } - ], - "ผบก.ปม.": [{ORTH: "ผบก.ปม.", LEMMA: "ผู้บังคับการตำรวจป่าไม้"}], - "ผบก.ภ.": [{ORTH: "ผบก.ภ.", LEMMA: "ผู้บังคับการตำรวจภูธร"}], - "ผบช.": [{ORTH: "ผบช.", LEMMA: "ผู้บัญชาการ (ตำรวจ)"}], - "ผบช.ก.": [{ORTH: "ผบช.ก.", LEMMA: "ผู้บัญชาการตำรวจสอบสวนกลาง"}], - "ผบช.ตชด.": [{ORTH: "ผบช.ตชด.", LEMMA: "ผู้บัญชาการตำรวจตระเวนชายแดน"}], - "ผบช.น.": [{ORTH: "ผบช.น.", LEMMA: "ผู้บัญชาการตำรวจนครบาล"}], - "ผบช.ภ.": [{ORTH: "ผบช.ภ.", LEMMA: "ผู้บัญชาการตำรวจภูธร"}], - "ผบ.ทบ.": [{ORTH: "ผบ.ทบ.", LEMMA: "ผู้บัญชาการทหารบก"}], - "ผบ.ตร.": [{ORTH: "ผบ.ตร.", LEMMA: "ผู้บัญชาการตำรวจแห่งชาติ"}], - "ผบ.ทร.": [{ORTH: "ผบ.ทร.", LEMMA: "ผู้บัญชาการทหารเรือ"}], - "ผบ.ทอ.": [{ORTH: "ผบ.ทอ.", LEMMA: "ผู้บัญชาการทหารอากาศ"}], - "ผบ.ทสส.": [{ORTH: "ผบ.ทสส.", LEMMA: "ผู้บัญชาการทหารสูงสุด"}], - "ผวจ.": [{ORTH: "ผวจ.", LEMMA: "ผู้ว่าราชการจังหวัด"}], - "ผู้ว่าฯ": [{ORTH: "ผู้ว่าฯ", LEMMA: "ผู้ว่าราชการจังหวัด"}], - "พ.จ.ต.": [{ORTH: "พ.จ.ต.", LEMMA: "พันจ่าตรี"}], - "พ.จ.ท.": [{ORTH: "พ.จ.ท.", LEMMA: "พันจ่าโท"}], - "พ.จ.อ.": [{ORTH: "พ.จ.อ.", LEMMA: "พันจ่าเอก"}], - "พญ.": [{ORTH: "พญ.", LEMMA: "แพทย์หญิง"}], - "ฯพณฯ": [{ORTH: "ฯพณฯ", LEMMA: "พณท่าน"}], - "พ.ต.": [{ORTH: "พ.ต.", LEMMA: "พันตรี"}], - "พ.ท.": [{ORTH: "พ.ท.", LEMMA: "พันโท"}], - "พ.อ.": [{ORTH: "พ.อ.", LEMMA: "พันเอก"}], - "พ.ต.อ.พิเศษ": [{ORTH: "พ.ต.อ.พิเศษ", LEMMA: "พันตำรวจเอกพิเศษ"}], - "พลฯ": [{ORTH: "พลฯ", LEMMA: "พลทหาร"}], - "พล.๑ รอ.": [{ORTH: "พล.๑ รอ.", LEMMA: "กองพลที่ ๑ รักษาพระองค์ กองทัพบก"}], - "พล.ต.": [{ORTH: "พล.ต.", LEMMA: "พลตรี"}], - "พล.ต.ต.": [{ORTH: "พล.ต.ต.", LEMMA: "พลตำรวจตรี"}], - "พล.ต.ท.": [{ORTH: "พล.ต.ท.", LEMMA: "พลตำรวจโท"}], - "พล.ต.อ.": [{ORTH: "พล.ต.อ.", LEMMA: "พลตำรวจเอก"}], - "พล.ท.": [{ORTH: "พล.ท.", LEMMA: "พลโท"}], - "พล.ปตอ.": [{ORTH: "พล.ปตอ.", LEMMA: "กองพลทหารปืนใหญ่ต่อสู่อากาศยาน"}], - "พล.ม.": [{ORTH: "พล.ม.", LEMMA: "กองพลทหารม้า"}], - "พล.ม.๒": [{ORTH: "พล.ม.๒", LEMMA: "กองพลทหารม้าที่ ๒"}], - "พล.ร.ต.": [{ORTH: "พล.ร.ต.", LEMMA: "พลเรือตรี"}], - "พล.ร.ท.": [{ORTH: "พล.ร.ท.", LEMMA: "พลเรือโท"}], - "พล.ร.อ.": [{ORTH: "พล.ร.อ.", LEMMA: "พลเรือเอก"}], - "พล.อ.": [{ORTH: "พล.อ.", LEMMA: "พลเอก"}], - "พล.อ.ต.": [{ORTH: "พล.อ.ต.", LEMMA: "พลอากาศตรี"}], - "พล.อ.ท.": [{ORTH: "พล.อ.ท.", LEMMA: "พลอากาศโท"}], - "พล.อ.อ.": [{ORTH: "พล.อ.อ.", LEMMA: "พลอากาศเอก"}], - "พ.อ.พิเศษ": [{ORTH: "พ.อ.พิเศษ", LEMMA: "พันเอกพิเศษ"}], - "พ.อ.ต.": [{ORTH: "พ.อ.ต.", LEMMA: "พันจ่าอากาศตรี"}], - "พ.อ.ท.": [{ORTH: "พ.อ.ท.", LEMMA: "พันจ่าอากาศโท"}], - "พ.อ.อ.": [{ORTH: "พ.อ.อ.", LEMMA: "พันจ่าอากาศเอก"}], - "ภกญ.": [{ORTH: "ภกญ.", LEMMA: "เภสัชกรหญิง"}], - "ม.จ.": [{ORTH: "ม.จ.", LEMMA: "หม่อมเจ้า"}], - "มท1": [{ORTH: "มท1", LEMMA: "รัฐมนตรีว่าการกระทรวงมหาดไทย"}], - "ม.ร.ว.": [{ORTH: "ม.ร.ว.", LEMMA: "หม่อมราชวงศ์"}], - "มล.": [{ORTH: "มล.", LEMMA: "หม่อมหลวง"}], - "ร.ต.": [{ORTH: "ร.ต.", LEMMA: "ร้อยตรี,เรือตรี,เรืออากาศตรี"}], - "ร.ต.ต.": [{ORTH: "ร.ต.ต.", LEMMA: "ร้อยตำรวจตรี"}], - "ร.ต.ท.": [{ORTH: "ร.ต.ท.", LEMMA: "ร้อยตำรวจโท"}], - "ร.ต.อ.": [{ORTH: "ร.ต.อ.", LEMMA: "ร้อยตำรวจเอก"}], - "ร.ท.": [{ORTH: "ร.ท.", LEMMA: "ร้อยโท,เรือโท,เรืออากาศโท"}], - "รมช.": [{ORTH: "รมช.", LEMMA: "รัฐมนตรีช่วยว่าการกระทรวง"}], - "รมต.": [{ORTH: "รมต.", LEMMA: "รัฐมนตรี"}], - "รมว.": [{ORTH: "รมว.", LEMMA: "รัฐมนตรีว่าการกระทรวง"}], - "รศ.": [{ORTH: "รศ.", LEMMA: "รองศาสตราจารย์"}], - "ร.อ.": [{ORTH: "ร.อ.", LEMMA: "ร้อยเอก,เรือเอก,เรืออากาศเอก"}], - "ศ.": [{ORTH: "ศ.", LEMMA: "ศาสตราจารย์"}], - "ส.ต.": [{ORTH: "ส.ต.", LEMMA: "สิบตรี"}], - "ส.ต.ต.": [{ORTH: "ส.ต.ต.", LEMMA: "สิบตำรวจตรี"}], - "ส.ต.ท.": [{ORTH: "ส.ต.ท.", LEMMA: "สิบตำรวจโท"}], - "ส.ต.อ.": [{ORTH: "ส.ต.อ.", LEMMA: "สิบตำรวจเอก"}], - "ส.ท.": [{ORTH: "ส.ท.", LEMMA: "สิบโท"}], - "สพ.": [{ORTH: "สพ.", LEMMA: "สัตวแพทย์"}], - "สพ.ญ.": [{ORTH: "สพ.ญ.", LEMMA: "สัตวแพทย์หญิง"}], - "สพ.ช.": [{ORTH: "สพ.ช.", LEMMA: "สัตวแพทย์ชาย"}], - "ส.อ.": [{ORTH: "ส.อ.", LEMMA: "สิบเอก"}], - "อจ.": [{ORTH: "อจ.", LEMMA: "อาจารย์"}], - "อจญ.": [{ORTH: "อจญ.", LEMMA: "อาจารย์ใหญ่"}], + "ดร.": [{ORTH: "ดร."}], + "ด.ต.": [{ORTH: "ด.ต."}], + "จ.ต.": [{ORTH: "จ.ต."}], + "จ.ท.": [{ORTH: "จ.ท."}], + "จ.ส.ต.": [{ORTH: "จ.ส.ต."}], + "จสต.": [{ORTH: "จสต."}], + "จ.ส.ท.": [{ORTH: "จ.ส.ท."}], + "จ.ส.อ.": [{ORTH: "จ.ส.อ."}], + "จ.อ.": [{ORTH: "จ.อ."}], + "ทพญ.": [{ORTH: "ทพญ."}], + "ทนพ.": [{ORTH: "ทนพ."}], + "นจอ.": [{ORTH: "นจอ."}], + "น.ช.": [{ORTH: "น.ช."}], + "น.ญ.": [{ORTH: "น.ญ."}], + "น.ต.": [{ORTH: "น.ต."}], + "น.ท.": [{ORTH: "น.ท."}], + "นตท.": [{ORTH: "นตท."}], + "นนส.": [{ORTH: "นนส."}], + "นนร.": [{ORTH: "นนร."}], + "นนอ.": [{ORTH: "นนอ."}], + "นพ.": [{ORTH: "นพ."}], + "นพท.": [{ORTH: "นพท."}], + "นรจ.": [{ORTH: "นรจ."}], + "นรต.": [{ORTH: "นรต."}], + "นศพ.": [{ORTH: "นศพ."}], + "นศท.": [{ORTH: "นศท."}], + "น.สพ.": [{ORTH: "น.สพ."}], + "น.อ.": [{ORTH: "น.อ."}], + "บช.ก.": [{ORTH: "บช.ก."}], + "บช.น.": [{ORTH: "บช.น."}], + "ผกก.": [{ORTH: "ผกก."}], + "ผกก.ภ.": [{ORTH: "ผกก.ภ."}], + "ผจก.": [{ORTH: "ผจก."}], + "ผช.": [{ORTH: "ผช."}], + "ผชก.": [{ORTH: "ผชก."}], + "ผช.ผอ.": [{ORTH: "ผช.ผอ."}], + "ผญบ.": [{ORTH: "ผญบ."}], + "ผบ.": [{ORTH: "ผบ."}], + "ผบก.": [{ORTH: "ผบก."}], + "ผบก.น.": [{ORTH: "ผบก.น."}], + "ผบก.ป.": [{ORTH: "ผบก.ป."}], + "ผบก.ปค.": [{ORTH: "ผบก.ปค."}], + "ผบก.ปม.": [{ORTH: "ผบก.ปม."}], + "ผบก.ภ.": [{ORTH: "ผบก.ภ."}], + "ผบช.": [{ORTH: "ผบช."}], + "ผบช.ก.": [{ORTH: "ผบช.ก."}], + "ผบช.ตชด.": [{ORTH: "ผบช.ตชด."}], + "ผบช.น.": [{ORTH: "ผบช.น."}], + "ผบช.ภ.": [{ORTH: "ผบช.ภ."}], + "ผบ.ทบ.": [{ORTH: "ผบ.ทบ."}], + "ผบ.ตร.": [{ORTH: "ผบ.ตร."}], + "ผบ.ทร.": [{ORTH: "ผบ.ทร."}], + "ผบ.ทอ.": [{ORTH: "ผบ.ทอ."}], + "ผบ.ทสส.": [{ORTH: "ผบ.ทสส."}], + "ผวจ.": [{ORTH: "ผวจ."}], + "ผู้ว่าฯ": [{ORTH: "ผู้ว่าฯ"}], + "พ.จ.ต.": [{ORTH: "พ.จ.ต."}], + "พ.จ.ท.": [{ORTH: "พ.จ.ท."}], + "พ.จ.อ.": [{ORTH: "พ.จ.อ."}], + "พญ.": [{ORTH: "พญ."}], + "ฯพณฯ": [{ORTH: "ฯพณฯ"}], + "พ.ต.": [{ORTH: "พ.ต."}], + "พ.ท.": [{ORTH: "พ.ท."}], + "พ.อ.": [{ORTH: "พ.อ."}], + "พ.ต.อ.พิเศษ": [{ORTH: "พ.ต.อ.พิเศษ"}], + "พลฯ": [{ORTH: "พลฯ"}], + "พล.๑ รอ.": [{ORTH: "พล.๑ รอ."}], + "พล.ต.": [{ORTH: "พล.ต."}], + "พล.ต.ต.": [{ORTH: "พล.ต.ต."}], + "พล.ต.ท.": [{ORTH: "พล.ต.ท."}], + "พล.ต.อ.": [{ORTH: "พล.ต.อ."}], + "พล.ท.": [{ORTH: "พล.ท."}], + "พล.ปตอ.": [{ORTH: "พล.ปตอ."}], + "พล.ม.": [{ORTH: "พล.ม."}], + "พล.ม.๒": [{ORTH: "พล.ม.๒"}], + "พล.ร.ต.": [{ORTH: "พล.ร.ต."}], + "พล.ร.ท.": [{ORTH: "พล.ร.ท."}], + "พล.ร.อ.": [{ORTH: "พล.ร.อ."}], + "พล.อ.": [{ORTH: "พล.อ."}], + "พล.อ.ต.": [{ORTH: "พล.อ.ต."}], + "พล.อ.ท.": [{ORTH: "พล.อ.ท."}], + "พล.อ.อ.": [{ORTH: "พล.อ.อ."}], + "พ.อ.พิเศษ": [{ORTH: "พ.อ.พิเศษ"}], + "พ.อ.ต.": [{ORTH: "พ.อ.ต."}], + "พ.อ.ท.": [{ORTH: "พ.อ.ท."}], + "พ.อ.อ.": [{ORTH: "พ.อ.อ."}], + "ภกญ.": [{ORTH: "ภกญ."}], + "ม.จ.": [{ORTH: "ม.จ."}], + "มท1": [{ORTH: "มท1"}], + "ม.ร.ว.": [{ORTH: "ม.ร.ว."}], + "มล.": [{ORTH: "มล."}], + "ร.ต.": [{ORTH: "ร.ต."}], + "ร.ต.ต.": [{ORTH: "ร.ต.ต."}], + "ร.ต.ท.": [{ORTH: "ร.ต.ท."}], + "ร.ต.อ.": [{ORTH: "ร.ต.อ."}], + "ร.ท.": [{ORTH: "ร.ท."}], + "รมช.": [{ORTH: "รมช."}], + "รมต.": [{ORTH: "รมต."}], + "รมว.": [{ORTH: "รมว."}], + "รศ.": [{ORTH: "รศ."}], + "ร.อ.": [{ORTH: "ร.อ."}], + "ศ.": [{ORTH: "ศ."}], + "ส.ต.": [{ORTH: "ส.ต."}], + "ส.ต.ต.": [{ORTH: "ส.ต.ต."}], + "ส.ต.ท.": [{ORTH: "ส.ต.ท."}], + "ส.ต.อ.": [{ORTH: "ส.ต.อ."}], + "ส.ท.": [{ORTH: "ส.ท."}], + "สพ.": [{ORTH: "สพ."}], + "สพ.ญ.": [{ORTH: "สพ.ญ."}], + "สพ.ช.": [{ORTH: "สพ.ช."}], + "ส.อ.": [{ORTH: "ส.อ."}], + "อจ.": [{ORTH: "อจ."}], + "อจญ.": [{ORTH: "อจญ."}], # วุฒิ / bachelor degree - "ป.": [{ORTH: "ป.", LEMMA: "ประถมศึกษา"}], - "ป.กศ.": [{ORTH: "ป.กศ.", LEMMA: "ประกาศนียบัตรวิชาการศึกษา"}], - "ป.กศ.สูง": [{ORTH: "ป.กศ.สูง", LEMMA: "ประกาศนียบัตรวิชาการศึกษาชั้นสูง"}], - "ปวช.": [{ORTH: "ปวช.", LEMMA: "ประกาศนียบัตรวิชาชีพ"}], - "ปวท.": [{ORTH: "ปวท.", LEMMA: "ประกาศนียบัตรวิชาชีพเทคนิค"}], - "ปวส.": [{ORTH: "ปวส.", LEMMA: "ประกาศนียบัตรวิชาชีพชั้นสูง"}], - "ปทส.": [{ORTH: "ปทส.", LEMMA: "ประกาศนียบัตรครูเทคนิคชั้นสูง"}], - "กษ.บ.": [{ORTH: "กษ.บ.", LEMMA: "เกษตรศาสตรบัณฑิต"}], - "กษ.ม.": [{ORTH: "กษ.ม.", LEMMA: "เกษตรศาสตรมหาบัณฑิต"}], - "กษ.ด.": [{ORTH: "กษ.ด.", LEMMA: "เกษตรศาสตรดุษฎีบัณฑิต"}], - "ค.บ.": [{ORTH: "ค.บ.", LEMMA: "ครุศาสตรบัณฑิต"}], - "คศ.บ.": [{ORTH: "คศ.บ.", LEMMA: "คหกรรมศาสตรบัณฑิต"}], - "คศ.ม.": [{ORTH: "คศ.ม.", LEMMA: "คหกรรมศาสตรมหาบัณฑิต"}], - "คศ.ด.": [{ORTH: "คศ.ด.", LEMMA: "คหกรรมศาสตรดุษฎีบัณฑิต"}], - "ค.อ.บ.": [{ORTH: "ค.อ.บ.", LEMMA: "ครุศาสตรอุตสาหกรรมบัณฑิต"}], - "ค.อ.ม.": [{ORTH: "ค.อ.ม.", LEMMA: "ครุศาสตรอุตสาหกรรมมหาบัณฑิต"}], - "ค.อ.ด.": [{ORTH: "ค.อ.ด.", LEMMA: "ครุศาสตรอุตสาหกรรมดุษฎีบัณฑิต"}], - "ทก.บ.": [{ORTH: "ทก.บ.", LEMMA: "เทคโนโลยีการเกษตรบัณฑิต"}], - "ทก.ม.": [{ORTH: "ทก.ม.", LEMMA: "เทคโนโลยีการเกษตรมหาบัณฑิต"}], - "ทก.ด.": [{ORTH: "ทก.ด.", LEMMA: "เทคโนโลยีการเกษตรดุษฎีบัณฑิต"}], - "ท.บ.": [{ORTH: "ท.บ.", LEMMA: "ทันตแพทยศาสตรบัณฑิต"}], - "ท.ม.": [{ORTH: "ท.ม.", LEMMA: "ทันตแพทยศาสตรมหาบัณฑิต"}], - "ท.ด.": [{ORTH: "ท.ด.", LEMMA: "ทันตแพทยศาสตรดุษฎีบัณฑิต"}], - "น.บ.": [{ORTH: "น.บ.", LEMMA: "นิติศาสตรบัณฑิต"}], - "น.ม.": [{ORTH: "น.ม.", LEMMA: "นิติศาสตรมหาบัณฑิต"}], - "น.ด.": [{ORTH: "น.ด.", LEMMA: "นิติศาสตรดุษฎีบัณฑิต"}], - "นศ.บ.": [{ORTH: "นศ.บ.", LEMMA: "นิเทศศาสตรบัณฑิต"}], - "นศ.ม.": [{ORTH: "นศ.ม.", LEMMA: "นิเทศศาสตรมหาบัณฑิต"}], - "นศ.ด.": [{ORTH: "นศ.ด.", LEMMA: "นิเทศศาสตรดุษฎีบัณฑิต"}], - "บช.บ.": [{ORTH: "บช.บ.", LEMMA: "บัญชีบัณฑิต"}], - "บช.ม.": [{ORTH: "บช.ม.", LEMMA: "บัญชีมหาบัณฑิต"}], - "บช.ด.": [{ORTH: "บช.ด.", LEMMA: "บัญชีดุษฎีบัณฑิต"}], - "บธ.บ.": [{ORTH: "บธ.บ.", LEMMA: "บริหารธุรกิจบัณฑิต"}], - "บธ.ม.": [{ORTH: "บธ.ม.", LEMMA: "บริหารธุรกิจมหาบัณฑิต"}], - "บธ.ด.": [{ORTH: "บธ.ด.", LEMMA: "บริหารธุรกิจดุษฎีบัณฑิต"}], - "พณ.บ.": [{ORTH: "พณ.บ.", LEMMA: "พาณิชยศาสตรบัณฑิต"}], - "พณ.ม.": [{ORTH: "พณ.ม.", LEMMA: "พาณิชยศาสตรมหาบัณฑิต"}], - "พณ.ด.": [{ORTH: "พณ.ด.", LEMMA: "พาณิชยศาสตรดุษฎีบัณฑิต"}], - "พ.บ.": [{ORTH: "พ.บ.", LEMMA: "แพทยศาสตรบัณฑิต"}], - "พ.ม.": [{ORTH: "พ.ม.", LEMMA: "แพทยศาสตรมหาบัณฑิต"}], - "พ.ด.": [{ORTH: "พ.ด.", LEMMA: "แพทยศาสตรดุษฎีบัณฑิต"}], - "พธ.บ.": [{ORTH: "พธ.บ.", LEMMA: "พุทธศาสตรบัณฑิต"}], - "พธ.ม.": [{ORTH: "พธ.ม.", LEMMA: "พุทธศาสตรมหาบัณฑิต"}], - "พธ.ด.": [{ORTH: "พธ.ด.", LEMMA: "พุทธศาสตรดุษฎีบัณฑิต"}], - "พบ.บ.": [{ORTH: "พบ.บ.", LEMMA: "พัฒนบริหารศาสตรบัณฑิต"}], - "พบ.ม.": [{ORTH: "พบ.ม.", LEMMA: "พัฒนบริหารศาสตรมหาบัณฑิต"}], - "พบ.ด.": [{ORTH: "พบ.ด.", LEMMA: "พัฒนบริหารศาสตรดุษฎีบัณฑิต"}], - "พย.บ.": [{ORTH: "พย.บ.", LEMMA: "พยาบาลศาสตรดุษฎีบัณฑิต"}], - "พย.ม.": [{ORTH: "พย.ม.", LEMMA: "พยาบาลศาสตรมหาบัณฑิต"}], - "พย.ด.": [{ORTH: "พย.ด.", LEMMA: "พยาบาลศาสตรดุษฎีบัณฑิต"}], - "พศ.บ.": [{ORTH: "พศ.บ.", LEMMA: "พาณิชยศาสตรบัณฑิต"}], - "พศ.ม.": [{ORTH: "พศ.ม.", LEMMA: "พาณิชยศาสตรมหาบัณฑิต"}], - "พศ.ด.": [{ORTH: "พศ.ด.", LEMMA: "พาณิชยศาสตรดุษฎีบัณฑิต"}], - "ภ.บ.": [{ORTH: "ภ.บ.", LEMMA: "เภสัชศาสตรบัณฑิต"}], - "ภ.ม.": [{ORTH: "ภ.ม.", LEMMA: "เภสัชศาสตรมหาบัณฑิต"}], - "ภ.ด.": [{ORTH: "ภ.ด.", LEMMA: "เภสัชศาสตรดุษฎีบัณฑิต"}], - "ภ.สถ.บ.": [{ORTH: "ภ.สถ.บ.", LEMMA: "ภูมิสถาปัตยกรรมศาสตรบัณฑิต"}], - "รป.บ.": [{ORTH: "รป.บ.", LEMMA: "รัฐประศาสนศาสตร์บัณฑิต"}], - "รป.ม.": [{ORTH: "รป.ม.", LEMMA: "รัฐประศาสนศาสตร์มหาบัณฑิต"}], - "วท.บ.": [{ORTH: "วท.บ.", LEMMA: "วิทยาศาสตรบัณฑิต"}], - "วท.ม.": [{ORTH: "วท.ม.", LEMMA: "วิทยาศาสตรมหาบัณฑิต"}], - "วท.ด.": [{ORTH: "วท.ด.", LEMMA: "วิทยาศาสตรดุษฎีบัณฑิต"}], - "ศ.บ.": [{ORTH: "ศ.บ.", LEMMA: "ศิลปบัณฑิต"}], - "ศศ.บ.": [{ORTH: "ศศ.บ.", LEMMA: "ศิลปศาสตรบัณฑิต"}], - "ศษ.บ.": [{ORTH: "ศษ.บ.", LEMMA: "ศึกษาศาสตรบัณฑิต"}], - "ศส.บ.": [{ORTH: "ศส.บ.", LEMMA: "เศรษฐศาสตรบัณฑิต"}], - "สถ.บ.": [{ORTH: "สถ.บ.", LEMMA: "สถาปัตยกรรมศาสตรบัณฑิต"}], - "สถ.ม.": [{ORTH: "สถ.ม.", LEMMA: "สถาปัตยกรรมศาสตรมหาบัณฑิต"}], - "สถ.ด.": [{ORTH: "สถ.ด.", LEMMA: "สถาปัตยกรรมศาสตรดุษฎีบัณฑิต"}], - "สพ.บ.": [{ORTH: "สพ.บ.", LEMMA: "สัตวแพทยศาสตรบัณฑิต"}], - "อ.บ.": [{ORTH: "อ.บ.", LEMMA: "อักษรศาสตรบัณฑิต"}], - "อ.ม.": [{ORTH: "อ.ม.", LEMMA: "อักษรศาสตรมหาบัณฑิต"}], - "อ.ด.": [{ORTH: "อ.ด.", LEMMA: "อักษรศาสตรดุษฎีบัณฑิต"}], + "ป.": [{ORTH: "ป."}], + "ป.กศ.": [{ORTH: "ป.กศ."}], + "ป.กศ.สูง": [{ORTH: "ป.กศ.สูง"}], + "ปวช.": [{ORTH: "ปวช."}], + "ปวท.": [{ORTH: "ปวท."}], + "ปวส.": [{ORTH: "ปวส."}], + "ปทส.": [{ORTH: "ปทส."}], + "กษ.บ.": [{ORTH: "กษ.บ."}], + "กษ.ม.": [{ORTH: "กษ.ม."}], + "กษ.ด.": [{ORTH: "กษ.ด."}], + "ค.บ.": [{ORTH: "ค.บ."}], + "คศ.บ.": [{ORTH: "คศ.บ."}], + "คศ.ม.": [{ORTH: "คศ.ม."}], + "คศ.ด.": [{ORTH: "คศ.ด."}], + "ค.อ.บ.": [{ORTH: "ค.อ.บ."}], + "ค.อ.ม.": [{ORTH: "ค.อ.ม."}], + "ค.อ.ด.": [{ORTH: "ค.อ.ด."}], + "ทก.บ.": [{ORTH: "ทก.บ."}], + "ทก.ม.": [{ORTH: "ทก.ม."}], + "ทก.ด.": [{ORTH: "ทก.ด."}], + "ท.บ.": [{ORTH: "ท.บ."}], + "ท.ม.": [{ORTH: "ท.ม."}], + "ท.ด.": [{ORTH: "ท.ด."}], + "น.บ.": [{ORTH: "น.บ."}], + "น.ม.": [{ORTH: "น.ม."}], + "น.ด.": [{ORTH: "น.ด."}], + "นศ.บ.": [{ORTH: "นศ.บ."}], + "นศ.ม.": [{ORTH: "นศ.ม."}], + "นศ.ด.": [{ORTH: "นศ.ด."}], + "บช.บ.": [{ORTH: "บช.บ."}], + "บช.ม.": [{ORTH: "บช.ม."}], + "บช.ด.": [{ORTH: "บช.ด."}], + "บธ.บ.": [{ORTH: "บธ.บ."}], + "บธ.ม.": [{ORTH: "บธ.ม."}], + "บธ.ด.": [{ORTH: "บธ.ด."}], + "พณ.บ.": [{ORTH: "พณ.บ."}], + "พณ.ม.": [{ORTH: "พณ.ม."}], + "พณ.ด.": [{ORTH: "พณ.ด."}], + "พ.บ.": [{ORTH: "พ.บ."}], + "พ.ม.": [{ORTH: "พ.ม."}], + "พ.ด.": [{ORTH: "พ.ด."}], + "พธ.บ.": [{ORTH: "พธ.บ."}], + "พธ.ม.": [{ORTH: "พธ.ม."}], + "พธ.ด.": [{ORTH: "พธ.ด."}], + "พบ.บ.": [{ORTH: "พบ.บ."}], + "พบ.ม.": [{ORTH: "พบ.ม."}], + "พบ.ด.": [{ORTH: "พบ.ด."}], + "พย.บ.": [{ORTH: "พย.บ."}], + "พย.ม.": [{ORTH: "พย.ม."}], + "พย.ด.": [{ORTH: "พย.ด."}], + "พศ.บ.": [{ORTH: "พศ.บ."}], + "พศ.ม.": [{ORTH: "พศ.ม."}], + "พศ.ด.": [{ORTH: "พศ.ด."}], + "ภ.บ.": [{ORTH: "ภ.บ."}], + "ภ.ม.": [{ORTH: "ภ.ม."}], + "ภ.ด.": [{ORTH: "ภ.ด."}], + "ภ.สถ.บ.": [{ORTH: "ภ.สถ.บ."}], + "รป.บ.": [{ORTH: "รป.บ."}], + "รป.ม.": [{ORTH: "รป.ม."}], + "วท.บ.": [{ORTH: "วท.บ."}], + "วท.ม.": [{ORTH: "วท.ม."}], + "วท.ด.": [{ORTH: "วท.ด."}], + "ศ.บ.": [{ORTH: "ศ.บ."}], + "ศศ.บ.": [{ORTH: "ศศ.บ."}], + "ศษ.บ.": [{ORTH: "ศษ.บ."}], + "ศส.บ.": [{ORTH: "ศส.บ."}], + "สถ.บ.": [{ORTH: "สถ.บ."}], + "สถ.ม.": [{ORTH: "สถ.ม."}], + "สถ.ด.": [{ORTH: "สถ.ด."}], + "สพ.บ.": [{ORTH: "สพ.บ."}], + "อ.บ.": [{ORTH: "อ.บ."}], + "อ.ม.": [{ORTH: "อ.ม."}], + "อ.ด.": [{ORTH: "อ.ด."}], # ปี / เวลา / year / time - "ชม.": [{ORTH: "ชม.", LEMMA: "ชั่วโมง"}], - "จ.ศ.": [{ORTH: "จ.ศ.", LEMMA: "จุลศักราช"}], - "ค.ศ.": [{ORTH: "ค.ศ.", LEMMA: "คริสต์ศักราช"}], - "ฮ.ศ.": [{ORTH: "ฮ.ศ.", LEMMA: "ฮิจเราะห์ศักราช"}], - "ว.ด.ป.": [{ORTH: "ว.ด.ป.", LEMMA: "วัน เดือน ปี"}], + "ชม.": [{ORTH: "ชม."}], + "จ.ศ.": [{ORTH: "จ.ศ."}], + "ค.ศ.": [{ORTH: "ค.ศ."}], + "ฮ.ศ.": [{ORTH: "ฮ.ศ."}], + "ว.ด.ป.": [{ORTH: "ว.ด.ป."}], # ระยะทาง / distance - "ฮม.": [{ORTH: "ฮม.", LEMMA: "เฮกโตเมตร"}], - "ดคม.": [{ORTH: "ดคม.", LEMMA: "เดคาเมตร"}], - "ดม.": [{ORTH: "ดม.", LEMMA: "เดซิเมตร"}], - "มม.": [{ORTH: "มม.", LEMMA: "มิลลิเมตร"}], - "ซม.": [{ORTH: "ซม.", LEMMA: "เซนติเมตร"}], - "กม.": [{ORTH: "กม.", LEMMA: "กิโลเมตร"}], + "ฮม.": [{ORTH: "ฮม."}], + "ดคม.": [{ORTH: "ดคม."}], + "ดม.": [{ORTH: "ดม."}], + "มม.": [{ORTH: "มม."}], + "ซม.": [{ORTH: "ซม."}], + "กม.": [{ORTH: "กม."}], # น้ำหนัก / weight - "น.น.": [{ORTH: "น.น.", LEMMA: "น้ำหนัก"}], - "ฮก.": [{ORTH: "ฮก.", LEMMA: "เฮกโตกรัม"}], - "ดคก.": [{ORTH: "ดคก.", LEMMA: "เดคากรัม"}], - "ดก.": [{ORTH: "ดก.", LEMMA: "เดซิกรัม"}], - "ซก.": [{ORTH: "ซก.", LEMMA: "เซนติกรัม"}], - "มก.": [{ORTH: "มก.", LEMMA: "มิลลิกรัม"}], - "ก.": [{ORTH: "ก.", LEMMA: "กรัม"}], - "กก.": [{ORTH: "กก.", LEMMA: "กิโลกรัม"}], + "น.น.": [{ORTH: "น.น."}], + "ฮก.": [{ORTH: "ฮก."}], + "ดคก.": [{ORTH: "ดคก."}], + "ดก.": [{ORTH: "ดก."}], + "ซก.": [{ORTH: "ซก."}], + "มก.": [{ORTH: "มก."}], + "ก.": [{ORTH: "ก."}], + "กก.": [{ORTH: "กก."}], # ปริมาตร / volume - "ฮล.": [{ORTH: "ฮล.", LEMMA: "เฮกโตลิตร"}], - "ดคล.": [{ORTH: "ดคล.", LEMMA: "เดคาลิตร"}], - "ดล.": [{ORTH: "ดล.", LEMMA: "เดซิลิตร"}], - "ซล.": [{ORTH: "ซล.", LEMMA: "เซนติลิตร"}], - "ล.": [{ORTH: "ล.", LEMMA: "ลิตร"}], - "กล.": [{ORTH: "กล.", LEMMA: "กิโลลิตร"}], - "ลบ.": [{ORTH: "ลบ.", LEMMA: "ลูกบาศก์"}], + "ฮล.": [{ORTH: "ฮล."}], + "ดคล.": [{ORTH: "ดคล."}], + "ดล.": [{ORTH: "ดล."}], + "ซล.": [{ORTH: "ซล."}], + "ล.": [{ORTH: "ล."}], + "กล.": [{ORTH: "กล."}], + "ลบ.": [{ORTH: "ลบ."}], # พื้นที่ / area - "ตร.ซม.": [{ORTH: "ตร.ซม.", LEMMA: "ตารางเซนติเมตร"}], - "ตร.ม.": [{ORTH: "ตร.ม.", LEMMA: "ตารางเมตร"}], - "ตร.ว.": [{ORTH: "ตร.ว.", LEMMA: "ตารางวา"}], - "ตร.กม.": [{ORTH: "ตร.กม.", LEMMA: "ตารางกิโลเมตร"}], + "ตร.ซม.": [{ORTH: "ตร.ซม."}], + "ตร.ม.": [{ORTH: "ตร.ม."}], + "ตร.ว.": [{ORTH: "ตร.ว."}], + "ตร.กม.": [{ORTH: "ตร.กม."}], # เดือน / month - "ม.ค.": [{ORTH: "ม.ค.", LEMMA: "มกราคม"}], - "ก.พ.": [{ORTH: "ก.พ.", LEMMA: "กุมภาพันธ์"}], - "มี.ค.": [{ORTH: "มี.ค.", LEMMA: "มีนาคม"}], - "เม.ย.": [{ORTH: "เม.ย.", LEMMA: "เมษายน"}], - "พ.ค.": [{ORTH: "พ.ค.", LEMMA: "พฤษภาคม"}], - "มิ.ย.": [{ORTH: "มิ.ย.", LEMMA: "มิถุนายน"}], - "ก.ค.": [{ORTH: "ก.ค.", LEMMA: "กรกฎาคม"}], - "ส.ค.": [{ORTH: "ส.ค.", LEMMA: "สิงหาคม"}], - "ก.ย.": [{ORTH: "ก.ย.", LEMMA: "กันยายน"}], - "ต.ค.": [{ORTH: "ต.ค.", LEMMA: "ตุลาคม"}], - "พ.ย.": [{ORTH: "พ.ย.", LEMMA: "พฤศจิกายน"}], - "ธ.ค.": [{ORTH: "ธ.ค.", LEMMA: "ธันวาคม"}], + "ม.ค.": [{ORTH: "ม.ค."}], + "ก.พ.": [{ORTH: "ก.พ."}], + "มี.ค.": [{ORTH: "มี.ค."}], + "เม.ย.": [{ORTH: "เม.ย."}], + "พ.ค.": [{ORTH: "พ.ค."}], + "มิ.ย.": [{ORTH: "มิ.ย."}], + "ก.ค.": [{ORTH: "ก.ค."}], + "ส.ค.": [{ORTH: "ส.ค."}], + "ก.ย.": [{ORTH: "ก.ย."}], + "ต.ค.": [{ORTH: "ต.ค."}], + "พ.ย.": [{ORTH: "พ.ย."}], + "ธ.ค.": [{ORTH: "ธ.ค."}], # เพศ / gender - "ช.": [{ORTH: "ช.", LEMMA: "ชาย"}], - "ญ.": [{ORTH: "ญ.", LEMMA: "หญิง"}], - "ด.ช.": [{ORTH: "ด.ช.", LEMMA: "เด็กชาย"}], - "ด.ญ.": [{ORTH: "ด.ญ.", LEMMA: "เด็กหญิง"}], + "ช.": [{ORTH: "ช."}], + "ญ.": [{ORTH: "ญ."}], + "ด.ช.": [{ORTH: "ด.ช."}], + "ด.ญ.": [{ORTH: "ด.ญ."}], # ที่อยู่ / address - "ถ.": [{ORTH: "ถ.", LEMMA: "ถนน"}], - "ต.": [{ORTH: "ต.", LEMMA: "ตำบล"}], - "อ.": [{ORTH: "อ.", LEMMA: "อำเภอ"}], - "จ.": [{ORTH: "จ.", LEMMA: "จังหวัด"}], + "ถ.": [{ORTH: "ถ."}], + "ต.": [{ORTH: "ต."}], + "อ.": [{ORTH: "อ."}], + "จ.": [{ORTH: "จ."}], # สรรพนาม / pronoun - "ข้าฯ": [{ORTH: "ข้าฯ", LEMMA: "ข้าพระพุทธเจ้า"}], - "ทูลเกล้าฯ": [{ORTH: "ทูลเกล้าฯ", LEMMA: "ทูลเกล้าทูลกระหม่อม"}], - "น้อมเกล้าฯ": [{ORTH: "น้อมเกล้าฯ", LEMMA: "น้อมเกล้าน้อมกระหม่อม"}], - "โปรดเกล้าฯ": [{ORTH: "โปรดเกล้าฯ", LEMMA: "โปรดเกล้าโปรดกระหม่อม"}], + "ข้าฯ": [{ORTH: "ข้าฯ"}], + "ทูลเกล้าฯ": [{ORTH: "ทูลเกล้าฯ"}], + "น้อมเกล้าฯ": [{ORTH: "น้อมเกล้าฯ"}], + "โปรดเกล้าฯ": [{ORTH: "โปรดเกล้าฯ"}], # การเมือง / politic - "ขจก.": [{ORTH: "ขจก.", LEMMA: "ขบวนการโจรก่อการร้าย"}], - "ขบด.": [{ORTH: "ขบด.", LEMMA: "ขบวนการแบ่งแยกดินแดน"}], - "นปช.": [{ORTH: "นปช.", LEMMA: "แนวร่วมประชาธิปไตยขับไล่เผด็จการ"}], - "ปชป.": [{ORTH: "ปชป.", LEMMA: "พรรคประชาธิปัตย์"}], - "ผกค.": [{ORTH: "ผกค.", LEMMA: "ผู้ก่อการร้ายคอมมิวนิสต์"}], - "พท.": [{ORTH: "พท.", LEMMA: "พรรคเพื่อไทย"}], - "พ.ร.ก.": [{ORTH: "พ.ร.ก.", LEMMA: "พระราชกำหนด"}], - "พ.ร.ฎ.": [{ORTH: "พ.ร.ฎ.", LEMMA: "พระราชกฤษฎีกา"}], - "พ.ร.บ.": [{ORTH: "พ.ร.บ.", LEMMA: "พระราชบัญญัติ"}], - "รธน.": [{ORTH: "รธน.", LEMMA: "รัฐธรรมนูญ"}], - "รบ.": [{ORTH: "รบ.", LEMMA: "รัฐบาล"}], - "รสช.": [{ORTH: "รสช.", LEMMA: "คณะรักษาความสงบเรียบร้อยแห่งชาติ"}], - "ส.ก.": [{ORTH: "ส.ก.", LEMMA: "สมาชิกสภากรุงเทพมหานคร"}], - "สจ.": [{ORTH: "สจ.", LEMMA: "สมาชิกสภาจังหวัด"}], - "สว.": [{ORTH: "สว.", LEMMA: "สมาชิกวุฒิสภา"}], - "ส.ส.": [{ORTH: "ส.ส.", LEMMA: "สมาชิกสภาผู้แทนราษฎร"}], + "ขจก.": [{ORTH: "ขจก."}], + "ขบด.": [{ORTH: "ขบด."}], + "นปช.": [{ORTH: "นปช."}], + "ปชป.": [{ORTH: "ปชป."}], + "ผกค.": [{ORTH: "ผกค."}], + "พท.": [{ORTH: "พท."}], + "พ.ร.ก.": [{ORTH: "พ.ร.ก."}], + "พ.ร.ฎ.": [{ORTH: "พ.ร.ฎ."}], + "พ.ร.บ.": [{ORTH: "พ.ร.บ."}], + "รธน.": [{ORTH: "รธน."}], + "รบ.": [{ORTH: "รบ."}], + "รสช.": [{ORTH: "รสช."}], + "ส.ก.": [{ORTH: "ส.ก."}], + "สจ.": [{ORTH: "สจ."}], + "สว.": [{ORTH: "สว."}], + "ส.ส.": [{ORTH: "ส.ส."}], # ทั่วไป / general - "ก.ข.ค.": [{ORTH: "ก.ข.ค.", LEMMA: "ก้างขวางคอ"}], - "กทม.": [{ORTH: "กทม.", LEMMA: "กรุงเทพมหานคร"}], - "กรุงเทพฯ": [{ORTH: "กรุงเทพฯ", LEMMA: "กรุงเทพมหานคร"}], - "ขรก.": [{ORTH: "ขรก.", LEMMA: "ข้าราชการ"}], - "ขส": [{ORTH: "ขส.", LEMMA: "ขนส่ง"}], - "ค.ร.น.": [{ORTH: "ค.ร.น.", LEMMA: "คูณร่วมน้อย"}], - "ค.ร.ม.": [{ORTH: "ค.ร.ม.", LEMMA: "คูณร่วมมาก"}], - "ง.ด.": [{ORTH: "ง.ด.", LEMMA: "เงินเดือน"}], - "งป.": [{ORTH: "งป.", LEMMA: "งบประมาณ"}], - "จก.": [{ORTH: "จก.", LEMMA: "จำกัด"}], - "จขกท.": [{ORTH: "จขกท.", LEMMA: "เจ้าของกระทู้"}], - "จนท.": [{ORTH: "จนท.", LEMMA: "เจ้าหน้าที่"}], - "จ.ป.ร.": [ - { - ORTH: "จ.ป.ร.", - LEMMA: "มหาจุฬาลงกรณ ปรมราชาธิราช (พระปรมาภิไธยในพระบาทสมเด็จพระจุลจอมเกล้าเจ้าอยู่หัว)", - } - ], - "จ.ม.": [{ORTH: "จ.ม.", LEMMA: "จดหมาย"}], - "จย.": [{ORTH: "จย.", LEMMA: "จักรยาน"}], - "จยย.": [{ORTH: "จยย.", LEMMA: "จักรยานยนต์"}], - "ตจว.": [{ORTH: "ตจว.", LEMMA: "ต่างจังหวัด"}], - "โทร.": [{ORTH: "โทร.", LEMMA: "โทรศัพท์"}], - "ธ.": [{ORTH: "ธ.", LEMMA: "ธนาคาร"}], - "น.ร.": [{ORTH: "น.ร.", LEMMA: "นักเรียน"}], - "น.ศ.": [{ORTH: "น.ศ.", LEMMA: "นักศึกษา"}], - "น.ส.": [{ORTH: "น.ส.", LEMMA: "นางสาว"}], - "น.ส.๓": [{ORTH: "น.ส.๓", LEMMA: "หนังสือรับรองการทำประโยชน์ในที่ดิน"}], - "น.ส.๓ ก.": [ - {ORTH: "น.ส.๓ ก", LEMMA: "หนังสือแสดงกรรมสิทธิ์ในที่ดิน (มีระวางกำหนด)"} - ], - "นสพ.": [{ORTH: "นสพ.", LEMMA: "หนังสือพิมพ์"}], - "บ.ก.": [{ORTH: "บ.ก.", LEMMA: "บรรณาธิการ"}], - "บจก.": [{ORTH: "บจก.", LEMMA: "บริษัทจำกัด"}], - "บงล.": [{ORTH: "บงล.", LEMMA: "บริษัทเงินทุนและหลักทรัพย์จำกัด"}], - "บบส.": [{ORTH: "บบส.", LEMMA: "บรรษัทบริหารสินทรัพย์สถาบันการเงิน"}], - "บมจ.": [{ORTH: "บมจ.", LEMMA: "บริษัทมหาชนจำกัด"}], - "บลจ.": [{ORTH: "บลจ.", LEMMA: "บริษัทหลักทรัพย์จัดการกองทุนรวมจำกัด"}], - "บ/ช": [{ORTH: "บ/ช", LEMMA: "บัญชี"}], - "บร.": [{ORTH: "บร.", LEMMA: "บรรณารักษ์"}], - "ปชช.": [{ORTH: "ปชช.", LEMMA: "ประชาชน"}], - "ปณ.": [{ORTH: "ปณ.", LEMMA: "ที่ทำการไปรษณีย์"}], - "ปณก.": [{ORTH: "ปณก.", LEMMA: "ที่ทำการไปรษณีย์กลาง"}], - "ปณส.": [{ORTH: "ปณส.", LEMMA: "ที่ทำการไปรษณีย์สาขา"}], - "ปธ.": [{ORTH: "ปธ.", LEMMA: "ประธาน"}], - "ปธน.": [{ORTH: "ปธน.", LEMMA: "ประธานาธิบดี"}], - "ปอ.": [{ORTH: "ปอ.", LEMMA: "รถยนต์โดยสารประจำทางปรับอากาศ"}], - "ปอ.พ.": [{ORTH: "ปอ.พ.", LEMMA: "รถยนต์โดยสารประจำทางปรับอากาศพิเศษ"}], - "พ.ก.ง.": [{ORTH: "พ.ก.ง.", LEMMA: "พัสดุเก็บเงินปลายทาง"}], - "พ.ก.ส.": [{ORTH: "พ.ก.ส.", LEMMA: "พนักงานเก็บค่าโดยสาร"}], - "พขร.": [{ORTH: "พขร.", LEMMA: "พนักงานขับรถ"}], - "ภ.ง.ด.": [{ORTH: "ภ.ง.ด.", LEMMA: "ภาษีเงินได้"}], - "ภ.ง.ด.๙": [{ORTH: "ภ.ง.ด.๙", LEMMA: "แบบแสดงรายการเสียภาษีเงินได้ของกรมสรรพากร"}], - "ภ.ป.ร.": [ - { - ORTH: "ภ.ป.ร.", - LEMMA: "ภูมิพลอดุยเดช ปรมราชาธิราช (พระปรมาภิไธยในพระบาทสมเด็จพระปรมินทรมหาภูมิพลอดุลยเดช)", - } - ], - "ภ.พ.": [{ORTH: "ภ.พ.", LEMMA: "ภาษีมูลค่าเพิ่ม"}], - "ร.": [{ORTH: "ร.", LEMMA: "รัชกาล"}], - "ร.ง.": [{ORTH: "ร.ง.", LEMMA: "โรงงาน"}], - "ร.ด.": [{ORTH: "ร.ด.", LEMMA: "รักษาดินแดน"}], - "รปภ.": [{ORTH: "รปภ.", LEMMA: "รักษาความปลอดภัย"}], - "รพ.": [{ORTH: "รพ.", LEMMA: "โรงพยาบาล"}], - "ร.พ.": [{ORTH: "ร.พ.", LEMMA: "โรงพิมพ์"}], - "รร.": [{ORTH: "รร.", LEMMA: "โรงเรียน,โรงแรม"}], - "รสก.": [{ORTH: "รสก.", LEMMA: "รัฐวิสาหกิจ"}], - "ส.ค.ส.": [{ORTH: "ส.ค.ส.", LEMMA: "ส่งความสุขปีใหม่"}], - "สต.": [{ORTH: "สต.", LEMMA: "สตางค์"}], - "สน.": [{ORTH: "สน.", LEMMA: "สถานีตำรวจ"}], - "สนข.": [{ORTH: "สนข.", LEMMA: "สำนักงานเขต"}], - "สนง.": [{ORTH: "สนง.", LEMMA: "สำนักงาน"}], - "สนญ.": [{ORTH: "สนญ.", LEMMA: "สำนักงานใหญ่"}], - "ส.ป.ช.": [{ORTH: "ส.ป.ช.", LEMMA: "สร้างเสริมประสบการณ์ชีวิต"}], - "สภ.": [{ORTH: "สภ.", LEMMA: "สถานีตำรวจภูธร"}], - "ส.ล.น.": [{ORTH: "ส.ล.น.", LEMMA: "สร้างเสริมลักษณะนิสัย"}], - "สวญ.": [{ORTH: "สวญ.", LEMMA: "สารวัตรใหญ่"}], - "สวป.": [{ORTH: "สวป.", LEMMA: "สารวัตรป้องกันปราบปราม"}], - "สว.สส.": [{ORTH: "สว.สส.", LEMMA: "สารวัตรสืบสวน"}], - "ส.ห.": [{ORTH: "ส.ห.", LEMMA: "สารวัตรทหาร"}], - "สอ.": [{ORTH: "สอ.", LEMMA: "สถานีอนามัย"}], - "สอท.": [{ORTH: "สอท.", LEMMA: "สถานเอกอัครราชทูต"}], - "เสธ.": [{ORTH: "เสธ.", LEMMA: "เสนาธิการ"}], - "หจก.": [{ORTH: "หจก.", LEMMA: "ห้างหุ้นส่วนจำกัด"}], - "ห.ร.ม.": [{ORTH: "ห.ร.ม.", LEMMA: "ตัวหารร่วมมาก"}], + "ก.ข.ค.": [{ORTH: "ก.ข.ค."}], + "กทม.": [{ORTH: "กทม."}], + "กรุงเทพฯ": [{ORTH: "กรุงเทพฯ"}], + "ขรก.": [{ORTH: "ขรก."}], + "ขส": [{ORTH: "ขส."}], + "ค.ร.น.": [{ORTH: "ค.ร.น."}], + "ค.ร.ม.": [{ORTH: "ค.ร.ม."}], + "ง.ด.": [{ORTH: "ง.ด."}], + "งป.": [{ORTH: "งป."}], + "จก.": [{ORTH: "จก."}], + "จขกท.": [{ORTH: "จขกท."}], + "จนท.": [{ORTH: "จนท."}], + "จ.ป.ร.": [{ORTH: "จ.ป.ร."}], + "จ.ม.": [{ORTH: "จ.ม."}], + "จย.": [{ORTH: "จย."}], + "จยย.": [{ORTH: "จยย."}], + "ตจว.": [{ORTH: "ตจว."}], + "โทร.": [{ORTH: "โทร."}], + "ธ.": [{ORTH: "ธ."}], + "น.ร.": [{ORTH: "น.ร."}], + "น.ศ.": [{ORTH: "น.ศ."}], + "น.ส.": [{ORTH: "น.ส."}], + "น.ส.๓": [{ORTH: "น.ส.๓"}], + "น.ส.๓ ก.": [{ORTH: "น.ส.๓ ก"}], + "นสพ.": [{ORTH: "นสพ."}], + "บ.ก.": [{ORTH: "บ.ก."}], + "บจก.": [{ORTH: "บจก."}], + "บงล.": [{ORTH: "บงล."}], + "บบส.": [{ORTH: "บบส."}], + "บมจ.": [{ORTH: "บมจ."}], + "บลจ.": [{ORTH: "บลจ."}], + "บ/ช": [{ORTH: "บ/ช"}], + "บร.": [{ORTH: "บร."}], + "ปชช.": [{ORTH: "ปชช."}], + "ปณ.": [{ORTH: "ปณ."}], + "ปณก.": [{ORTH: "ปณก."}], + "ปณส.": [{ORTH: "ปณส."}], + "ปธ.": [{ORTH: "ปธ."}], + "ปธน.": [{ORTH: "ปธน."}], + "ปอ.": [{ORTH: "ปอ."}], + "ปอ.พ.": [{ORTH: "ปอ.พ."}], + "พ.ก.ง.": [{ORTH: "พ.ก.ง."}], + "พ.ก.ส.": [{ORTH: "พ.ก.ส."}], + "พขร.": [{ORTH: "พขร."}], + "ภ.ง.ด.": [{ORTH: "ภ.ง.ด."}], + "ภ.ง.ด.๙": [{ORTH: "ภ.ง.ด.๙"}], + "ภ.ป.ร.": [{ORTH: "ภ.ป.ร."}], + "ภ.พ.": [{ORTH: "ภ.พ."}], + "ร.": [{ORTH: "ร."}], + "ร.ง.": [{ORTH: "ร.ง."}], + "ร.ด.": [{ORTH: "ร.ด."}], + "รปภ.": [{ORTH: "รปภ."}], + "รพ.": [{ORTH: "รพ."}], + "ร.พ.": [{ORTH: "ร.พ."}], + "รร.": [{ORTH: "รร."}], + "รสก.": [{ORTH: "รสก."}], + "ส.ค.ส.": [{ORTH: "ส.ค.ส."}], + "สต.": [{ORTH: "สต."}], + "สน.": [{ORTH: "สน."}], + "สนข.": [{ORTH: "สนข."}], + "สนง.": [{ORTH: "สนง."}], + "สนญ.": [{ORTH: "สนญ."}], + "ส.ป.ช.": [{ORTH: "ส.ป.ช."}], + "สภ.": [{ORTH: "สภ."}], + "ส.ล.น.": [{ORTH: "ส.ล.น."}], + "สวญ.": [{ORTH: "สวญ."}], + "สวป.": [{ORTH: "สวป."}], + "สว.สส.": [{ORTH: "สว.สส."}], + "ส.ห.": [{ORTH: "ส.ห."}], + "สอ.": [{ORTH: "สอ."}], + "สอท.": [{ORTH: "สอท."}], + "เสธ.": [{ORTH: "เสธ."}], + "หจก.": [{ORTH: "หจก."}], + "ห.ร.ม.": [{ORTH: "ห.ร.ม."}], } diff --git a/spacy/lang/tl/tokenizer_exceptions.py b/spacy/lang/tl/tokenizer_exceptions.py index f81d35f20..51ad12d9f 100644 --- a/spacy/lang/tl/tokenizer_exceptions.py +++ b/spacy/lang/tl/tokenizer_exceptions.py @@ -1,18 +1,18 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA +from ...symbols import ORTH, NORM from ...util import update_exc _exc = { - "tayo'y": [{ORTH: "tayo", LEMMA: "tayo"}, {ORTH: "'y", LEMMA: "ay"}], - "isa'y": [{ORTH: "isa", LEMMA: "isa"}, {ORTH: "'y", LEMMA: "ay"}], - "baya'y": [{ORTH: "baya", LEMMA: "bayan"}, {ORTH: "'y", LEMMA: "ay"}], - "sa'yo": [{ORTH: "sa", LEMMA: "sa"}, {ORTH: "'yo", LEMMA: "iyo"}], - "ano'ng": [{ORTH: "ano", LEMMA: "ano"}, {ORTH: "'ng", LEMMA: "ang"}], - "siya'y": [{ORTH: "siya", LEMMA: "siya"}, {ORTH: "'y", LEMMA: "ay"}], - "nawa'y": [{ORTH: "nawa", LEMMA: "nawa"}, {ORTH: "'y", LEMMA: "ay"}], - "papa'no": [{ORTH: "papa'no", LEMMA: "papaano"}], - "'di": [{ORTH: "'di", LEMMA: "hindi"}], + "tayo'y": [{ORTH: "tayo"}, {ORTH: "'y", NORM: "ay"}], + "isa'y": [{ORTH: "isa"}, {ORTH: "'y", NORM: "ay"}], + "baya'y": [{ORTH: "baya"}, {ORTH: "'y", NORM: "ay"}], + "sa'yo": [{ORTH: "sa"}, {ORTH: "'yo", NORM: "iyo"}], + "ano'ng": [{ORTH: "ano"}, {ORTH: "'ng", NORM: "ang"}], + "siya'y": [{ORTH: "siya"}, {ORTH: "'y", NORM: "ay"}], + "nawa'y": [{ORTH: "nawa"}, {ORTH: "'y", NORM: "ay"}], + "papa'no": [{ORTH: "papa'no", NORM: "papaano"}], + "'di": [{ORTH: "'di", NORM: "hindi"}], } diff --git a/spacy/lang/tokenizer_exceptions.py b/spacy/lang/tokenizer_exceptions.py index 45391332e..2532ae104 100644 --- a/spacy/lang/tokenizer_exceptions.py +++ b/spacy/lang/tokenizer_exceptions.py @@ -1,7 +1,7 @@ import re from .char_classes import ALPHA_LOWER -from ..symbols import ORTH, POS, TAG, LEMMA, SPACE +from ..symbols import ORTH, NORM # URL validation regex courtesy of: https://mathiasbynens.be/demo/url-regex @@ -62,13 +62,13 @@ BASE_EXCEPTIONS = {} for exc_data in [ - {ORTH: " ", POS: SPACE, TAG: "_SP"}, - {ORTH: "\t", POS: SPACE, TAG: "_SP"}, - {ORTH: "\\t", POS: SPACE, TAG: "_SP"}, - {ORTH: "\n", POS: SPACE, TAG: "_SP"}, - {ORTH: "\\n", POS: SPACE, TAG: "_SP"}, + {ORTH: " "}, + {ORTH: "\t"}, + {ORTH: "\\t"}, + {ORTH: "\n"}, + {ORTH: "\\n"}, {ORTH: "\u2014"}, - {ORTH: "\u00a0", POS: SPACE, LEMMA: " ", TAG: "_SP"}, + {ORTH: "\u00a0", NORM: " "}, ]: BASE_EXCEPTIONS[exc_data[ORTH]] = [exc_data] diff --git a/spacy/lang/tt/tokenizer_exceptions.py b/spacy/lang/tt/tokenizer_exceptions.py index 4ee6b6cd2..3b8cc86b5 100644 --- a/spacy/lang/tt/tokenizer_exceptions.py +++ b/spacy/lang/tt/tokenizer_exceptions.py @@ -1,5 +1,5 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, NORM +from ...symbols import ORTH, NORM from ...util import update_exc @@ -7,35 +7,35 @@ _exc = {} _abbrev_exc = [ # Weekdays abbreviations - {ORTH: "дш", LEMMA: "дүшәмбе"}, - {ORTH: "сш", LEMMA: "сишәмбе"}, - {ORTH: "чш", LEMMA: "чәршәмбе"}, - {ORTH: "пш", LEMMA: "пәнҗешәмбе"}, - {ORTH: "җм", LEMMA: "җомга"}, - {ORTH: "шб", LEMMA: "шимбә"}, - {ORTH: "яш", LEMMA: "якшәмбе"}, + {ORTH: "дш", NORM: "дүшәмбе"}, + {ORTH: "сш", NORM: "сишәмбе"}, + {ORTH: "чш", NORM: "чәршәмбе"}, + {ORTH: "пш", NORM: "пәнҗешәмбе"}, + {ORTH: "җм", NORM: "җомга"}, + {ORTH: "шб", NORM: "шимбә"}, + {ORTH: "яш", NORM: "якшәмбе"}, # Months abbreviations - {ORTH: "гый", LEMMA: "гыйнвар"}, - {ORTH: "фев", LEMMA: "февраль"}, - {ORTH: "мар", LEMMA: "март"}, - {ORTH: "мар", LEMMA: "март"}, - {ORTH: "апр", LEMMA: "апрель"}, - {ORTH: "июн", LEMMA: "июнь"}, - {ORTH: "июл", LEMMA: "июль"}, - {ORTH: "авг", LEMMA: "август"}, - {ORTH: "сен", LEMMA: "сентябрь"}, - {ORTH: "окт", LEMMA: "октябрь"}, - {ORTH: "ноя", LEMMA: "ноябрь"}, - {ORTH: "дек", LEMMA: "декабрь"}, + {ORTH: "гый", NORM: "гыйнвар"}, + {ORTH: "фев", NORM: "февраль"}, + {ORTH: "мар", NORM: "март"}, + {ORTH: "мар", NORM: "март"}, + {ORTH: "апр", NORM: "апрель"}, + {ORTH: "июн", NORM: "июнь"}, + {ORTH: "июл", NORM: "июль"}, + {ORTH: "авг", NORM: "август"}, + {ORTH: "сен", NORM: "сентябрь"}, + {ORTH: "окт", NORM: "октябрь"}, + {ORTH: "ноя", NORM: "ноябрь"}, + {ORTH: "дек", NORM: "декабрь"}, # Number abbreviations - {ORTH: "млрд", LEMMA: "миллиард"}, - {ORTH: "млн", LEMMA: "миллион"}, + {ORTH: "млрд", NORM: "миллиард"}, + {ORTH: "млн", NORM: "миллион"}, ] for abbr in _abbrev_exc: for orth in (abbr[ORTH], abbr[ORTH].capitalize(), abbr[ORTH].upper()): - _exc[orth] = [{ORTH: orth, LEMMA: abbr[LEMMA], NORM: abbr[LEMMA]}] - _exc[orth + "."] = [{ORTH: orth + ".", LEMMA: abbr[LEMMA], NORM: abbr[LEMMA]}] + _exc[orth] = [{ORTH: orth, NORM: abbr[NORM]}] + _exc[orth + "."] = [{ORTH: orth + ".", NORM: abbr[NORM]}] for exc_data in [ # "etc." abbreviations {ORTH: "һ.б.ш.", NORM: "һәм башка шундыйлар"}, @@ -43,7 +43,6 @@ for exc_data in [ # "etc." abbreviations {ORTH: "б.э.к.", NORM: "безнең эрага кадәр"}, {ORTH: "б.э.", NORM: "безнең эра"}, ]: - exc_data[LEMMA] = exc_data[NORM] _exc[exc_data[ORTH]] = [exc_data] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc) diff --git a/spacy/lang/uk/tokenizer_exceptions.py b/spacy/lang/uk/tokenizer_exceptions.py index 8ae82a48c..94016fd52 100644 --- a/spacy/lang/uk/tokenizer_exceptions.py +++ b/spacy/lang/uk/tokenizer_exceptions.py @@ -1,24 +1,24 @@ from ..tokenizer_exceptions import BASE_EXCEPTIONS -from ...symbols import ORTH, LEMMA, POS, NORM, NOUN +from ...symbols import ORTH, NORM from ...util import update_exc _exc = {} for exc_data in [ - {ORTH: "вул.", LEMMA: "вулиця", NORM: "вулиця", POS: NOUN}, - {ORTH: "ім.", LEMMA: "ім'я", NORM: "імені", POS: NOUN}, - {ORTH: "просп.", LEMMA: "проспект", NORM: "проспект", POS: NOUN}, - {ORTH: "бул.", LEMMA: "бульвар", NORM: "бульвар", POS: NOUN}, - {ORTH: "пров.", LEMMA: "провулок", NORM: "провулок", POS: NOUN}, - {ORTH: "пл.", LEMMA: "площа", NORM: "площа", POS: NOUN}, - {ORTH: "г.", LEMMA: "гора", NORM: "гора", POS: NOUN}, - {ORTH: "п.", LEMMA: "пан", NORM: "пан", POS: NOUN}, - {ORTH: "м.", LEMMA: "місто", NORM: "місто", POS: NOUN}, - {ORTH: "проф.", LEMMA: "професор", NORM: "професор", POS: NOUN}, - {ORTH: "акад.", LEMMA: "академік", NORM: "академік", POS: NOUN}, - {ORTH: "доц.", LEMMA: "доцент", NORM: "доцент", POS: NOUN}, - {ORTH: "оз.", LEMMA: "озеро", NORM: "озеро", POS: NOUN}, + {ORTH: "вул.", NORM: "вулиця"}, + {ORTH: "ім.", NORM: "імені"}, + {ORTH: "просп.", NORM: "проспект"}, + {ORTH: "бул.", NORM: "бульвар"}, + {ORTH: "пров.", NORM: "провулок"}, + {ORTH: "пл.", NORM: "площа"}, + {ORTH: "г.", NORM: "гора"}, + {ORTH: "п.", NORM: "пан"}, + {ORTH: "м.", NORM: "місто"}, + {ORTH: "проф.", NORM: "професор"}, + {ORTH: "акад.", NORM: "академік"}, + {ORTH: "доц.", NORM: "доцент"}, + {ORTH: "оз.", NORM: "озеро"}, ]: _exc[exc_data[ORTH]] = [exc_data] diff --git a/spacy/tests/lang/ar/test_exceptions.py b/spacy/tests/lang/ar/test_exceptions.py index 125220caf..0129c3a19 100644 --- a/spacy/tests/lang/ar/test_exceptions.py +++ b/spacy/tests/lang/ar/test_exceptions.py @@ -12,7 +12,6 @@ def test_ar_tokenizer_handles_exc_in_text(ar_tokenizer): tokens = ar_tokenizer(text) assert len(tokens) == 7 assert tokens[6].text == "ق.م" - assert tokens[6].lemma_ == "قبل الميلاد" def test_ar_tokenizer_handles_exc_in_text_2(ar_tokenizer): diff --git a/spacy/tests/lang/ca/test_exception.py b/spacy/tests/lang/ca/test_exception.py index 71098f094..cfb574b63 100644 --- a/spacy/tests/lang/ca/test_exception.py +++ b/spacy/tests/lang/ca/test_exception.py @@ -8,7 +8,6 @@ import pytest def test_ca_tokenizer_handles_abbr(ca_tokenizer, text, lemma): tokens = ca_tokenizer(text) assert len(tokens) == 1 - assert tokens[0].lemma_ == lemma def test_ca_tokenizer_handles_exc_in_text(ca_tokenizer): @@ -16,4 +15,3 @@ def test_ca_tokenizer_handles_exc_in_text(ca_tokenizer): tokens = ca_tokenizer(text) assert len(tokens) == 15 assert tokens[7].text == "aprox." - assert tokens[7].lemma_ == "aproximadament" diff --git a/spacy/tests/lang/de/test_exceptions.py b/spacy/tests/lang/de/test_exceptions.py index a1bbaf58b..d51c33992 100644 --- a/spacy/tests/lang/de/test_exceptions.py +++ b/spacy/tests/lang/de/test_exceptions.py @@ -18,4 +18,3 @@ def test_de_tokenizer_handles_exc_in_text(de_tokenizer): tokens = de_tokenizer(text) assert len(tokens) == 6 assert tokens[2].text == "z.Zt." - assert tokens[2].lemma_ == "zur Zeit" diff --git a/spacy/tests/lang/en/test_exceptions.py b/spacy/tests/lang/en/test_exceptions.py index f72dfbf25..1b56a3b0f 100644 --- a/spacy/tests/lang/en/test_exceptions.py +++ b/spacy/tests/lang/en/test_exceptions.py @@ -49,7 +49,6 @@ def test_en_tokenizer_handles_ll_contraction(en_tokenizer, text): assert len(tokens) == 2 assert tokens[0].text == text.split("'")[0] assert tokens[1].text == "'ll" - assert tokens[1].lemma_ == "will" @pytest.mark.parametrize( @@ -104,7 +103,6 @@ def test_en_tokenizer_handles_exc_in_text(en_tokenizer): def test_en_tokenizer_handles_times(en_tokenizer, text): tokens = en_tokenizer(text) assert len(tokens) == 2 - assert tokens[1].lemma_ in ["a.m.", "p.m."] @pytest.mark.parametrize( diff --git a/spacy/tests/lang/es/test_exception.py b/spacy/tests/lang/es/test_exception.py index 90d897a4c..07df5d69e 100644 --- a/spacy/tests/lang/es/test_exception.py +++ b/spacy/tests/lang/es/test_exception.py @@ -13,7 +13,6 @@ import pytest def test_es_tokenizer_handles_abbr(es_tokenizer, text, lemma): tokens = es_tokenizer(text) assert len(tokens) == 1 - assert tokens[0].lemma_ == lemma def test_es_tokenizer_handles_exc_in_text(es_tokenizer): @@ -21,4 +20,3 @@ def test_es_tokenizer_handles_exc_in_text(es_tokenizer): tokens = es_tokenizer(text) assert len(tokens) == 7 assert tokens[4].text == "aprox." - assert tokens[4].lemma_ == "aproximadamente" diff --git a/spacy/tests/lang/fr/test_exceptions.py b/spacy/tests/lang/fr/test_exceptions.py index 4b7ccad65..77e72a76b 100644 --- a/spacy/tests/lang/fr/test_exceptions.py +++ b/spacy/tests/lang/fr/test_exceptions.py @@ -37,19 +37,11 @@ def test_fr_tokenizer_infix_exceptions(fr_tokenizer, text): @pytest.mark.parametrize( - "text,lemma", - [ - ("janv.", "janvier"), - ("juill.", "juillet"), - ("Dr.", "docteur"), - ("av.", "avant"), - ("sept.", "septembre"), - ], + "text", ["janv.", "juill.", "Dr.", "av.", "sept."], ) -def test_fr_tokenizer_handles_abbr(fr_tokenizer, text, lemma): +def test_fr_tokenizer_handles_abbr(fr_tokenizer, text): tokens = fr_tokenizer(text) assert len(tokens) == 1 - assert tokens[0].lemma_ == lemma def test_fr_tokenizer_handles_exc_in_text(fr_tokenizer): @@ -57,7 +49,6 @@ def test_fr_tokenizer_handles_exc_in_text(fr_tokenizer): tokens = fr_tokenizer(text) assert len(tokens) == 10 assert tokens[6].text == "janv." - assert tokens[6].lemma_ == "janvier" assert tokens[8].text == "prud’hommes" @@ -74,11 +65,8 @@ def test_fr_tokenizer_handles_title(fr_tokenizer): tokens = fr_tokenizer(text) assert len(tokens) == 6 assert tokens[0].text == "N'" - assert tokens[0].lemma_ == "ne" assert tokens[1].text == "est" - assert tokens[1].lemma_ == "être" assert tokens[2].text == "-ce" - assert tokens[2].lemma_ == "ce" def test_fr_tokenizer_handles_title_2(fr_tokenizer): @@ -86,9 +74,7 @@ def test_fr_tokenizer_handles_title_2(fr_tokenizer): tokens = fr_tokenizer(text) assert len(tokens) == 5 assert tokens[0].text == "Est" - assert tokens[0].lemma_ == "être" assert tokens[1].text == "-ce" - assert tokens[1].lemma_ == "ce" def test_fr_tokenizer_handles_title_3(fr_tokenizer): @@ -96,4 +82,3 @@ def test_fr_tokenizer_handles_title_3(fr_tokenizer): tokens = fr_tokenizer(text) assert len(tokens) == 7 assert tokens[0].text == "Qu'" - assert tokens[0].lemma_ == "que" diff --git a/spacy/tests/lang/lb/test_exceptions.py b/spacy/tests/lang/lb/test_exceptions.py index d941a854b..fc4b4fa7b 100644 --- a/spacy/tests/lang/lb/test_exceptions.py +++ b/spacy/tests/lang/lb/test_exceptions.py @@ -18,4 +18,3 @@ def test_lb_tokenizer_handles_exc_in_text(lb_tokenizer): tokens = lb_tokenizer(text) assert len(tokens) == 9 assert tokens[1].text == "'t" - assert tokens[1].lemma_ == "et" diff --git a/spacy/tests/regression/test_issue1501-2000.py b/spacy/tests/regression/test_issue1501-2000.py index 9d2ef999b..b5d586ec6 100644 --- a/spacy/tests/regression/test_issue1501-2000.py +++ b/spacy/tests/regression/test_issue1501-2000.py @@ -157,8 +157,6 @@ def test_issue1758(en_tokenizer): """Test that "would've" is handled by the English tokenizer exceptions.""" tokens = en_tokenizer("would've") assert len(tokens) == 2 - assert tokens[0].tag_ == "MD" - assert tokens[1].lemma_ == "have" def test_issue1773(en_tokenizer): diff --git a/spacy/tests/regression/test_issue2501-3000.py b/spacy/tests/regression/test_issue2501-3000.py index 7917157aa..ac0867189 100644 --- a/spacy/tests/regression/test_issue2501-3000.py +++ b/spacy/tests/regression/test_issue2501-3000.py @@ -166,7 +166,6 @@ def test_issue2822(it_tokenizer): assert doc[0].text == "Vuoi" assert doc[1].text == "un" assert doc[2].text == "po'" - assert doc[2].lemma_ == "poco" assert doc[3].text == "di" assert doc[4].text == "zucchero" assert doc[5].text == "?"