Update punctuation.py

Add mathematical left and right angle brackets as punctuation for ancient Greek for better tokenization.
This commit is contained in:
Jacobo Myerston 2023-07-15 15:43:03 -07:00
parent 8933b37021
commit e8c23a04f3

View File

@ -6,6 +6,7 @@ _prefixes = (
[
"",
"",
"",
]
+ LIST_PUNCT
+ LIST_ELLIPSES
@ -22,6 +23,7 @@ _suffixes = (
+ [
"",
"",
"",
r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]",
]
)