From e8c23a04f32d855b20face4fc4f669155ccec002 Mon Sep 17 00:00:00 2001 From: Jacobo Myerston <43222279+jmyerston@users.noreply.github.com> Date: Sat, 15 Jul 2023 15:43:03 -0700 Subject: [PATCH] Update punctuation.py Add mathematical left and right angle brackets as punctuation for ancient Greek for better tokenization. --- spacy/lang/grc/punctuation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/lang/grc/punctuation.py b/spacy/lang/grc/punctuation.py index 8f3589e9a..f48f7cfbe 100644 --- a/spacy/lang/grc/punctuation.py +++ b/spacy/lang/grc/punctuation.py @@ -6,6 +6,7 @@ _prefixes = ( [ "†", "⸏", + "〈", ] + LIST_PUNCT + LIST_ELLIPSES @@ -22,6 +23,7 @@ _suffixes = ( + [ "†", "⸎", + "〉", r"(?<=[\u1F00-\u1FFF\u0370-\u03FF])[\-\.⸏]", ] )