From aad23ab0b404aefdfc762a1573c7474d34ec7c20 Mon Sep 17 00:00:00 2001 From: Magnus Burton Date: Tue, 10 Jan 2017 16:07:20 +0100 Subject: [PATCH] Supplemented with capitalized Swedish exceptions --- spacy/sv/tokenizer_exceptions.py | 57 ++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/spacy/sv/tokenizer_exceptions.py b/spacy/sv/tokenizer_exceptions.py index d8d4e8823..43732612f 100644 --- a/spacy/sv/tokenizer_exceptions.py +++ b/spacy/sv/tokenizer_exceptions.py @@ -63,6 +63,63 @@ TOKENIZER_EXCEPTIONS = { "sön.": [ {ORTH: "sön.", LEMMA: "söndag"} ], + "Jan.": [ + {ORTH: "Jan.", LEMMA: "Januari"} + ], + "Febr.": [ + {ORTH: "Febr.", LEMMA: "Februari"} + ], + "Feb.": [ + {ORTH: "Feb.", LEMMA: "Februari"} + ], + "Apr.": [ + {ORTH: "Apr.", LEMMA: "April"} + ], + "Jun.": [ + {ORTH: "Jun.", LEMMA: "Juni"} + ], + "Jul.": [ + {ORTH: "Jul.", LEMMA: "Juli"} + ], + "Aug.": [ + {ORTH: "Aug.", LEMMA: "Augusti"} + ], + "Sept.": [ + {ORTH: "Sept.", LEMMA: "September"} + ], + "Sep.": [ + {ORTH: "Sep.", LEMMA: "September"} + ], + "Okt.": [ + {ORTH: "Okt.", LEMMA: "Oktober"} + ], + "Nov.": [ + {ORTH: "Nov.", LEMMA: "November"} + ], + "Dec.": [ + {ORTH: "Dec.", LEMMA: "December"} + ], + "Mån.": [ + {ORTH: "Mån.", LEMMA: "Måndag"} + ], + "Tis.": [ + {ORTH: "Tis.", LEMMA: "Tisdag"} + ], + "Ons.": [ + {ORTH: "Ons.", LEMMA: "Onsdag"} + ], + "Tors.": [ + {ORTH: "Tors.", LEMMA: "Torsdag"} + ], + "Fre.": [ + {ORTH: "Fre.", LEMMA: "Fredag"} + ], + "Lör.": [ + {ORTH: "Lör.", LEMMA: "Lördag"} + ], + "Sön.": [ + {ORTH: "Sön.", LEMMA: "Söndag"} + ], "sthlm": [ {ORTH: "sthlm", LEMMA: "Stockholm"} ],