2020-07-23 00:09:01 +03:00
|
|
|
from ...symbols import ORTH
|
2020-07-22 23:18:46 +03:00
|
|
|
from ...util import update_exc
|
2023-06-26 12:41:03 +03:00
|
|
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
2019-02-21 00:10:13 +03:00
|
|
|
|
2020-03-25 13:28:02 +03:00
|
|
|
_exc = {
|
|
|
|
"all'art.": [{ORTH: "all'"}, {ORTH: "art."}],
|
|
|
|
"dall'art.": [{ORTH: "dall'"}, {ORTH: "art."}],
|
|
|
|
"dell'art.": [{ORTH: "dell'"}, {ORTH: "art."}],
|
|
|
|
"L'art.": [{ORTH: "L'"}, {ORTH: "art."}],
|
|
|
|
"l'art.": [{ORTH: "l'"}, {ORTH: "art."}],
|
|
|
|
"nell'art.": [{ORTH: "nell'"}, {ORTH: "art."}],
|
2020-07-23 00:09:01 +03:00
|
|
|
"po'": [{ORTH: "po'"}],
|
2020-03-25 14:28:12 +03:00
|
|
|
"sett..": [{ORTH: "sett."}, {ORTH: "."}],
|
2020-03-25 13:28:02 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
for orth in [
|
|
|
|
"..",
|
|
|
|
"....",
|
2021-03-30 11:23:32 +03:00
|
|
|
"a.C.",
|
2020-03-25 13:28:02 +03:00
|
|
|
"al.",
|
|
|
|
"all-path",
|
|
|
|
"art.",
|
|
|
|
"Art.",
|
|
|
|
"artt.",
|
|
|
|
"att.",
|
2021-03-30 11:23:32 +03:00
|
|
|
"avv.",
|
2021-06-28 12:48:00 +03:00
|
|
|
"Avv.",
|
2020-03-25 13:28:02 +03:00
|
|
|
"by-pass",
|
|
|
|
"c.d.",
|
2021-03-30 11:23:32 +03:00
|
|
|
"c/c",
|
|
|
|
"C.so",
|
2020-03-25 13:28:02 +03:00
|
|
|
"centro-sinistra",
|
|
|
|
"check-up",
|
|
|
|
"Civ.",
|
|
|
|
"cm.",
|
|
|
|
"Cod.",
|
|
|
|
"col.",
|
|
|
|
"Cost.",
|
|
|
|
"d.C.",
|
2020-03-25 14:28:12 +03:00
|
|
|
'de"',
|
2020-03-25 13:28:02 +03:00
|
|
|
"distr.",
|
|
|
|
"E'",
|
|
|
|
"ecc.",
|
|
|
|
"e-mail",
|
|
|
|
"e/o",
|
|
|
|
"etc.",
|
|
|
|
"Jr.",
|
|
|
|
"n°",
|
|
|
|
"nord-est",
|
|
|
|
"pag.",
|
|
|
|
"Proc.",
|
|
|
|
"prof.",
|
|
|
|
"sett.",
|
|
|
|
"s.p.a.",
|
2021-03-30 11:23:32 +03:00
|
|
|
"s.n.c",
|
|
|
|
"s.r.l",
|
2020-03-25 13:28:02 +03:00
|
|
|
"ss.",
|
|
|
|
"St.",
|
|
|
|
"tel.",
|
|
|
|
"week-end",
|
|
|
|
]:
|
|
|
|
_exc[orth] = [{ORTH: orth}]
|
2019-02-21 00:10:13 +03:00
|
|
|
|
2020-07-22 23:18:46 +03:00
|
|
|
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
|