mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 13:47:13 +03:00
19 lines
432 B
Python
19 lines
432 B
Python
# coding: utf8
|
||
from __future__ import unicode_literals
|
||
|
||
# Here we only want to include the absolute most common words. Otherwise,
|
||
# this list would get impossibly long for German – especially considering the
|
||
# old vs. new spelling rules, and all possible cases.
|
||
|
||
|
||
_exc = {
|
||
"daß": "dass"
|
||
}
|
||
|
||
|
||
NORM_EXCEPTIONS = {}
|
||
|
||
for string, norm in _exc.items():
|
||
NORM_EXCEPTIONS[string] = norm
|
||
NORM_EXCEPTIONS[string.title()] = norm
|