Update punctuation.py

Add mathematical left and right angle brackets as punctuation for ancient Greek for better tokenization.
This commit is contained in:
Jacobo Myerston 2023-07-15 15:43:03 -07:00
parent 8933b37021
commit e8c23a04f3

View File

@ -6,6 +6,7 @@ _prefixes = (
[ [
"", "",
"", "",
"",
] ]
+ LIST_PUNCT + LIST_PUNCT
+ LIST_ELLIPSES + LIST_ELLIPSES
@ -22,6 +23,7 @@ _suffixes = (
+ [ + [
"", "",
"", "",
"",
r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]", r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]",
] ]
) )