mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 17:36:30 +03:00
Split off period after two or more uppercase letters (fixes #483)
This commit is contained in:
parent
a89a6000e5
commit
eef94e3ee2
|
@ -92,6 +92,7 @@ TOKENIZER_SUFFIXES = (
|
|||
r'(?<=[0-9])(?:{c})'.format(c=CURRENCY),
|
||||
r'(?<=[0-9])(?:{u})'.format(u=UNITS),
|
||||
r'(?<=[0-9{al}{p}(?:{q})])\.'.format(al=ALPHA_LOWER, p=r'%²\-\)\]\+', q=QUOTES),
|
||||
r'(?<=[{au}][{au}])\.'.format(au=ALPHA_UPPER),
|
||||
"'s", "'S", "’s", "’S"
|
||||
]
|
||||
)
|
||||
|
|
Loading…
Reference in New Issue
Block a user