diff --git a/spacy/lang/id/punctuation.py b/spacy/lang/id/punctuation.py index 85da34074..794d6a39e 100644 --- a/spacy/lang/id/punctuation.py +++ b/spacy/lang/id/punctuation.py @@ -27,7 +27,7 @@ LIST_CURRENCY = split_chars(_currency) _prefixes = TOKENIZER_PREFIXES + LIST_CURRENCY + [HTML_PREFIX] + ['[Kk]e-', '/', '—'] -_suffixes = TOKENIZER_SUFFIXES + [r'\-[Nn]ya', '—'] + [ +_suffixes = TOKENIZER_SUFFIXES + [r'\-[Nn]ya', '—', '-'] + [ r'(?<=[0-9])(?:{c})'.format(c=CURRENCY), r'(?<=[0-9])(?:{u})'.format(u=UNITS), r'(?<=[0-9])%',