mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
20 lines
507 B
Python
20 lines
507 B
Python
|
# coding: utf8
|
||
|
from __future__ import unicode_literals
|
||
|
|
||
|
# TODO
|
||
|
# norm execptions: find a possibility to deal with the zillions of spelling variants (vläicht = vlaicht, vleicht, viläicht, viläischt, etc. etc.)
|
||
|
|
||
|
# here one could include the most common spelling mistakes
|
||
|
|
||
|
_exc = {
|
||
|
"datt": "dass",
|
||
|
"wgl.": "weg.",
|
||
|
"wgl.": "wegl.",
|
||
|
"vläicht": "viläicht"}
|
||
|
|
||
|
|
||
|
NORM_EXCEPTIONS = {}
|
||
|
|
||
|
for string, norm in _exc.items():
|
||
|
NORM_EXCEPTIONS[string] = norm
|
||
|
NORM_EXCEPTIONS[string.title()] = norm
|