diff --git a/spacy/en/language_data.py b/spacy/en/language_data.py index 504103566..4666c0c6f 100644 --- a/spacy/en/language_data.py +++ b/spacy/en/language_data.py @@ -102,8 +102,8 @@ TOKENIZER_PREFIXES = r''', " ( [ { * < $ £ “ ' `` ` # US$ C$ A$ a- ‘ .... . TOKENIZER_SUFFIXES = (r''', \" \) \] \} \* \! \? % \$ > : ; ' ” '' 's 'S ’s ’S ’''' - '''\.\. \.\.\. \.\.\.\. (?<=[a-z0-9)\]"'%\)])\. ''' - '''(?<=[0-9])km''').strip().split() + r'''\.\. \.\.\. \.\.\.\. (?<=[a-z0-9)\]”"'%\)])\. ''' + r'''(?<=[0-9])km''').strip().split() TOKENIZER_INFIXES = (r'''\.\.\.+ (?<=[a-z])\.(?=[A-Z]) (?<=[a-zA-Z])-(?=[a-zA-z]) '''