From 2897a73559ca1663d0e258604686e0134b9095d0 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Mon, 23 Mar 2020 19:23:47 +0100 Subject: [PATCH] Improve German tokenizer settings style --- spacy/lang/de/punctuation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/spacy/lang/de/punctuation.py b/spacy/lang/de/punctuation.py index c376ce597..da6ab1d40 100644 --- a/spacy/lang/de/punctuation.py +++ b/spacy/lang/de/punctuation.py @@ -4,10 +4,10 @@ from __future__ import unicode_literals from ..char_classes import LIST_ELLIPSES, LIST_ICONS, LIST_PUNCT, LIST_QUOTES from ..char_classes import LIST_CURRENCY, CURRENCY, UNITS, PUNCT from ..char_classes import CONCAT_QUOTES, ALPHA, ALPHA_LOWER, ALPHA_UPPER -from ..punctuation import _prefixes, _suffixes +from ..punctuation import TOKENIZER_PREFIXES as BASE_TOKENIZER_PREFIXES -_prefixes = ["``",] + list(_prefixes) +_prefixes = ["``"] + BASE_TOKENIZER_PREFIXES _suffixes = ( ["''", "/"]