mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 12:18:04 +03:00
18 lines
397 B
Python
18 lines
397 B
Python
|
# coding: utf8
|
|||
|
from __future__ import unicode_literals
|
|||
|
|
|||
|
# Here we only want to include the absolute most common words. Otherwise,
|
|||
|
# this list would get impossibly long for German – especially considering the
|
|||
|
# old vs. new spelling rules, and all possible cases.
|
|||
|
|
|||
|
|
|||
|
_exc = {
|
|||
|
"daß": "dass"
|
|||
|
}
|
|||
|
|
|||
|
|
|||
|
NORM_EXCEPTIONS = {}
|
|||
|
|
|||
|
for string, norm in _exc.items():
|
|||
|
NORM_EXCEPTIONS[string.title()] = norm
|