mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-15 22:27:12 +03:00
13 lines
268 B
Python
13 lines
268 B
Python
from ..char_classes import LIST_QUOTES
|
|
from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
|
|
|
|
|
|
_infixes = (
|
|
["·", "ㆍ", r"\(", r"\)"]
|
|
+ [r"(?<=[0-9])~(?=[0-9-])"]
|
|
+ LIST_QUOTES
|
|
+ BASE_TOKENIZER_INFIXES
|
|
)
|
|
|
|
TOKENIZER_INFIXES = _infixes
|