spaCy/spacy/lang/lb/norm_exceptions.py

17 lines
475 B
Python
Raw Normal View History

# coding: utf8
from __future__ import unicode_literals
# TODO
2019-10-18 12:27:38 +03:00
# norm execptions: find a possibility to deal with the zillions of spelling
# variants (vläicht = vlaicht, vleicht, viläicht, viläischt, etc. etc.)
# here one could include the most common spelling mistakes
2019-10-18 12:27:38 +03:00
_exc = {"datt": "dass", "wgl.": "weg.", "vläicht": "viläicht"}
NORM_EXCEPTIONS = {}
for string, norm in _exc.items():
NORM_EXCEPTIONS[string] = norm
2019-10-18 12:27:38 +03:00
NORM_EXCEPTIONS[string.title()] = norm