from ...symbols import ORTH from ...util import update_exc from ..tokenizer_exceptions import BASE_EXCEPTIONS _exc = {} for raw in [ "a-e", "a-o", "a-i", "a-a", "co-a", "co-e", "co-i", "co-o", "da-a", "da-e", "da-i", "da-o", "pe-a", "pe-e", "pe-i", "pe-o", ]: for orth in [raw, raw.capitalize()]: _exc[orth] = [{ORTH: orth}] # Prefix + prepositions with à (e.g. "sott'a-o") for prep in [ "a-a", "a-e", "a-o", "a-i", ]: for prefix in [ "sott'", "sott’", "contr'", "contr’", "ch'", "ch’", "s'", "s’", ]: for prefix_orth in [prefix, prefix.capitalize()]: _exc[prefix_orth + prep] = [{ORTH: prefix_orth}, {ORTH: prep}] TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)