mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Don't split tokens with digits and "/" infixes (resolves #740)
This commit is contained in:
parent
e9e99a5670
commit
0894b8c0ef
|
@ -103,7 +103,7 @@ TOKENIZER_SUFFIXES = (
|
|||
TOKENIZER_INFIXES = (
|
||||
LIST_ELLIPSES +
|
||||
[
|
||||
r'(?<=[0-9])[+\-\*/^](?=[0-9])',
|
||||
r'(?<=[0-9])[+\-\*^](?=[0-9-])',
|
||||
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
||||
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
||||
r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
||||
|
|
Loading…
Reference in New Issue
Block a user