2024-03-15 13:55:27 +03:00
|
|
|
from ...util import update_exc
|
|
|
|
from ...symbols import NORM, ORTH
|
|
|
|
from ..tokenizer_exceptions import BASE_EXCEPTIONS
|
2017-02-10 15:17:05 +03:00
|
|
|
|
2024-03-15 10:45:23 +03:00
|
|
|
|
2024-03-15 13:55:27 +03:00
|
|
|
_exc = {
|
|
|
|
"St": [{ORTH: "St", NORM: "Saint"}],
|
|
|
|
"Ste": [{ORTH: "Ste", NORM: "Sainte"}],
|
|
|
|
"Mme": [{ORTH: "Mme", NORM: "Madame"}],
|
2024-03-15 14:05:01 +03:00
|
|
|
"Mr": [{ORTH: "Mr", NORM: "Monsieur"}],
|
2024-03-15 13:55:27 +03:00
|
|
|
"M.": [{ORTH: "M.", NORM: "Monsieur"}],
|
|
|
|
"Mlle": [{ORTH: "Mlle", NORM: "Mademoiselle"}],
|
|
|
|
"Dr": [{ORTH: "Dr", NORM: "Docteur"}],
|
|
|
|
"Dresse": [{ORTH: "Dresse", NORM: "Doctoresse"}],
|
|
|
|
"Drsse": [{ORTH: "Drsse", NORM: "Doctoresse"}],
|
|
|
|
"etc": [{ORTH: "etc", NORM: "etcaetera"}],
|
|
|
|
}
|
2024-03-15 10:45:23 +03:00
|
|
|
|
2024-03-15 13:55:27 +03:00
|
|
|
TOKENIZER_EXCEPTIONS = update_exc(BASE_EXCEPTIONS, _exc)
|