Split off period after two or more uppercase letters (fixes #483)

This commit is contained in:
Ines Montani 2017-01-08 22:28:25 +01:00
parent a89a6000e5
commit eef94e3ee2

View File

@ -92,6 +92,7 @@ TOKENIZER_SUFFIXES = (
r'(?<=[0-9])(?:{c})'.format(c=CURRENCY),
r'(?<=[0-9])(?:{u})'.format(u=UNITS),
r'(?<=[0-9{al}{p}(?:{q})])\.'.format(al=ALPHA_LOWER, p=r'%²\-\)\]\+', q=QUOTES),
r'(?<=[{au}][{au}])\.'.format(au=ALPHA_UPPER),
"'s", "'S", "s", "S"
]
)