Update Hungarian punctuation to remove empty string

Update Hungarian punctuation definitions so that `_units` does not match
an empty string.
This commit is contained in:
Adriane Boyd 2019-11-06 12:16:56 +01:00
parent 983c88d02e
commit f0a577f7a5

View File

@ -2,7 +2,8 @@
from __future__ import unicode_literals
from ..char_classes import LIST_PUNCT, LIST_ELLIPSES, LIST_QUOTES, CONCAT_QUOTES
from ..char_classes import CONCAT_ICONS, UNITS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..char_classes import CONCAT_ICONS, ALPHA, ALPHA_LOWER, ALPHA_UPPER
from ..char_classes import LIST_UNITS, merge_chars
# removing ° from the special icons to keep e.g. 99° as one token
@ -10,7 +11,8 @@ _concat_icons = CONCAT_ICONS.replace("\u00B0", "")
_currency = r"\$¢£€¥฿"
_quotes = CONCAT_QUOTES.replace("'", "")
_units = UNITS.replace("%", "")
_list_units = [s for s in LIST_UNITS if s != "%"]
_units = merge_chars(" ".join(_list_units))
_prefixes = (
LIST_PUNCT