spaCy/spacy/lang/de/norm_exceptions.py

19 lines
432 B
Python
Raw Normal View History

2017-06-03 21:54:18 +03:00
# coding: utf8
from __future__ import unicode_literals
# Here we only want to include the absolute most common words. Otherwise,
# this list would get impossibly long for German especially considering the
# old vs. new spelling rules, and all possible cases.
_exc = {
"daß": "dass"
}
NORM_EXCEPTIONS = {}
for string, norm in _exc.items():
2018-10-13 16:37:30 +03:00
NORM_EXCEPTIONS[string] = norm
2017-06-03 21:54:18 +03:00
NORM_EXCEPTIONS[string.title()] = norm