Merge pull request #10185 from martinjack/master

Update Ukrainian tokenizer_exceptions
This commit is contained in:
Sofie Van Landeghem 2022-02-06 16:30:03 +01:00 committed by GitHub
commit bc12ecb870
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -6,19 +6,30 @@ from ...util import update_exc
_exc = {} _exc = {}
for exc_data in [ for exc_data in [
{ORTH: "обл.", NORM: "область"},
{ORTH: "р-н.", NORM: "район"},
{ORTH: "р", NORM: "район"},
{ORTH: "м.", NORM: "місто"},
{ORTH: "вул.", NORM: "вулиця"}, {ORTH: "вул.", NORM: "вулиця"},
{ORTH: "ім.", NORM: "імені"},
{ORTH: "просп.", NORM: "проспект"}, {ORTH: "просп.", NORM: "проспект"},
{ORTH: "пр-кт", NORM: "проспект"},
{ORTH: "бул.", NORM: "бульвар"}, {ORTH: "бул.", NORM: "бульвар"},
{ORTH: "пров.", NORM: "провулок"}, {ORTH: "пров.", NORM: "провулок"},
{ORTH: "пл.", NORM: "площа"}, {ORTH: "пл.", NORM: "площа"},
{ORTH: "майд.", NORM: "майдан"},
{ORTH: "мкр.", NORM: "мікрорайон"},
{ORTH: "ст.", NORM: "станція"},
{ORTH: "ж/м", NORM: "житловий масив"},
{ORTH: "наб.", NORM: "набережна"},
{ORTH: "в/ч", NORM: "військова частина"},
{ORTH: "в/м", NORM: "військове містечко"},
{ORTH: "оз.", NORM: "озеро"},
{ORTH: "ім.", NORM: "імені"},
{ORTH: "г.", NORM: "гора"}, {ORTH: "г.", NORM: "гора"},
{ORTH: "п.", NORM: "пан"}, {ORTH: "п.", NORM: "пан"},
{ORTH: "м.", NORM: "місто"},
{ORTH: "проф.", NORM: "професор"}, {ORTH: "проф.", NORM: "професор"},
{ORTH: "акад.", NORM: "академік"}, {ORTH: "акад.", NORM: "академік"},
{ORTH: "доц.", NORM: "доцент"}, {ORTH: "доц.", NORM: "доцент"},
{ORTH: "оз.", NORM: "озеро"},
]: ]:
_exc[exc_data[ORTH]] = [exc_data] _exc[exc_data[ORTH]] = [exc_data]