From 9f740a9891d6c118eeb154dd819dba58d93db8ac Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 26 Feb 2020 14:59:03 +0100 Subject: [PATCH] Add a few more Danish tokenizer exceptions --- spacy/lang/da/tokenizer_exceptions.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py index d669fb981..89b083186 100644 --- a/spacy/lang/da/tokenizer_exceptions.py +++ b/spacy/lang/da/tokenizer_exceptions.py @@ -70,6 +70,7 @@ for orth in [ "A/S", "B.C.", "BK.", + "B.T.", "Dr.", "Boul.", "Chr.", @@ -79,6 +80,7 @@ for orth in [ "Hf.", "i/s", "I/S", + "Inc.", "Kprs.", "L.A.", "Ll.", @@ -149,6 +151,7 @@ for orth in [ "bygn.", "c/o", "ca.", + "cm.", "cand.", "d.d.", "d.m.", @@ -172,10 +175,12 @@ for orth in [ "dl.", "do.", "dobb.", + "dr.", "dr.h.c", "dr.phil.", "ds.", "dvs.", + "d.v.s.", "e.b.", "e.l.", "e.o.", @@ -297,10 +302,14 @@ for orth in [ "kap.", "kbh.", "kem.", + "kg.", + "kgs.", "kgl.", "kl.", "kld.", + "km.", "km/t", + "km/t.", "knsp.", "komm.", "kons.", @@ -311,6 +320,7 @@ for orth in [ "kt.", "ktr.", "kv.", + "kvm.", "kvt.", "l.c.", "lab.", @@ -357,6 +367,7 @@ for orth in [ "nto.", "nuv.", "o/m", + "o/m.", "o.a.", "o.fl.", "o.h.", @@ -526,6 +537,7 @@ for orth in [ "vejl.", "vh.", "vha.", + "vind.", "vs.", "vsa.", "vær.",