diff --git a/spacy/language_data/punctuation.py b/spacy/language_data/punctuation.py index d8ed19ca1..8b92922f8 100644 --- a/spacy/language_data/punctuation.py +++ b/spacy/language_data/punctuation.py @@ -103,7 +103,7 @@ TOKENIZER_SUFFIXES = ( TOKENIZER_INFIXES = ( LIST_ELLIPSES + [ - r'(?<=[0-9])[+\-\*/^](?=[0-9])', + r'(?<=[0-9])[+\-\*^](?=[0-9-])', r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER), r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA), r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),