mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 21:57:15 +03:00
12 lines
267 B
Python
12 lines
267 B
Python
from ..char_classes import LIST_QUOTES
|
|
from ..punctuation import TOKENIZER_INFIXES as BASE_TOKENIZER_INFIXES
|
|
|
|
_infixes = (
|
|
["·", "ㆍ", r"\(", r"\)"]
|
|
+ [r"(?<=[0-9])~(?=[0-9-])"]
|
|
+ LIST_QUOTES
|
|
+ BASE_TOKENIZER_INFIXES
|
|
)
|
|
|
|
TOKENIZER_INFIXES = _infixes
|