From 4535fc65b498b273d58c33e953d5c37e133e9c5f Mon Sep 17 00:00:00 2001 From: Lise Brinck Date: Wed, 22 Feb 2023 15:09:05 +0100 Subject: [PATCH] allow : as infix if succeeding char is uppercase --- spacy/lang/sv/punctuation.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spacy/lang/sv/punctuation.py b/spacy/lang/sv/punctuation.py index 4476ed528..67f1bcdc4 100644 --- a/spacy/lang/sv/punctuation.py +++ b/spacy/lang/sv/punctuation.py @@ -12,10 +12,12 @@ _infixes = ( r"(?<=[{al}])\.(?=[{au}])".format(al=ALPHA_LOWER, au=ALPHA_UPPER), r"(?<=[{a}])[,!?](?=[{a}])".format(a=ALPHA), r"(?<=[{a}])[<>=](?=[{a}])".format(a=ALPHA), + r"(?<=[{a}]):(?=[{a}])".format(a=ALPHA_UPPER), r"(?<=[{a}]),(?=[{a}])".format(a=ALPHA), r"(?<=[{a}])([{q}\)\]\(\[])(?=[{a}])".format(a=ALPHA, q=_quotes), r"(?<=[{a}])--(?=[{a}])".format(a=ALPHA), r"(?<=[{a}0-9])[<>=/](?=[{a}])".format(a=ALPHA), + r"(?<=[{a}0-9]):(?=[{a}])".format(a=ALPHA_UPPER), ] )