mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-30 20:06:30 +03:00
18 lines
397 B
Python
18 lines
397 B
Python
# coding: utf8
|
||
from __future__ import unicode_literals
|
||
|
||
# Here we only want to include the absolute most common words. Otherwise,
|
||
# this list would get impossibly long for German – especially considering the
|
||
# old vs. new spelling rules, and all possible cases.
|
||
|
||
|
||
_exc = {
|
||
"daß": "dass"
|
||
}
|
||
|
||
|
||
NORM_EXCEPTIONS = {}
|
||
|
||
for string, norm in _exc.items():
|
||
NORM_EXCEPTIONS[string.title()] = norm
|