Merge pull request #5185 from adrianeboyd/bugfix/de-punctuation-style

Improve German tokenizer settings style
This commit is contained in:
Ines Montani 2020-03-24 16:38:32 +01:00 committed by GitHub
commit 5f2afa0479
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4,10 +4,10 @@ from __future__ import unicode_literals
from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES
from ..char_classes import LIST_CURRENCY, CURRENCY, UNITS, PUNCT
from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..punctuation import _prefixes, _suffixes
from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES
_prefixes = ["``",] + list(_prefixes)
_prefixes = ["``"] + BASE_TOKENIZER_PREFIXES
_suffixes = (
["''", "/"]