Improve German tokenizer settings style

This commit is contained in:
Adriane Boyd 2020-03-23 19:23:47 +01:00
parent 80e7e1347e
commit 2897a73559

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
from ..char_classes import LIST_CURRENCY, CURRENCY, UNITS, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..punctuation import _prefixes, _suffixes
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
_prefixes = ["``",] + list(_prefixes)
_prefixes = ["``"] + BASE_TOKENIZER_PREFIXES
_suffixes = (
["''", "/"]