mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-10 16:22:29 +03:00
Don't split tokens with digits and "/" infixes (resolves #740)
This commit is contained in:
parent
e9e99a5670
commit
0894b8c0ef
|
@ -103,7 +103,7 @@ TOKENIZER_SUFFIXES = (
|
||||||
TOKENIZER_INFIXES = (
|
TOKENIZER_INFIXES = (
|
||||||
LIST_ELLIPSES +
|
LIST_ELLIPSES +
|
||||||
[
|
[
|
||||||
r'(?<=[0-9])[+\-\*/^](?=[0-9])',
|
r'(?<=[0-9])[+\-\*^](?=[0-9-])',
|
||||||
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
r'(?<=[{al}])\.(?=[{au}])'.format(al=ALPHA_LOWER, au=ALPHA_UPPER),
|
||||||
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
r'(?<=[{a}]),(?=[{a}])'.format(a=ALPHA),
|
||||||
r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
r'(?<=[{a}])(?:{h})(?=[{a}])'.format(a=ALPHA, h=HYPHENS),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user