From ac8116510d3463995e0788b032b3aecb62acea22 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=B8ren=20Lind=20Kristiansen?= <soren@gutsandglory.dk>
Date: Fri, 24 Nov 2017 11:16:53 +0100
Subject: [PATCH] Fix tokenization of 'i.' for Danish.

---
 spacy/lang/da/tokenizer_exceptions.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/spacy/lang/da/tokenizer_exceptions.py b/spacy/lang/da/tokenizer_exceptions.py
index c67c038bf..303d41158 100644
--- a/spacy/lang/da/tokenizer_exceptions.py
+++ b/spacy/lang/da/tokenizer_exceptions.py
@@ -1,7 +1,7 @@
 # encoding: utf8
 from __future__ import unicode_literals
 
-from ...symbols import ORTH, LEMMA, NORM
+from ...symbols import ORTH, LEMMA, NORM, TAG, ADP, PUNCT
 
 
 _exc = {}
@@ -28,5 +28,12 @@ for orth in [
     "t.o.m.", "vha.", ""]:
     _exc[orth] = [{ORTH: orth}]
 
+_custom_base_exc = {
+    "i.": [
+        {ORTH: "i", LEMMA: "i", NORM: "i"},
+        {ORTH: ".", TAG: PUNCT}]
+}
+_exc.update(_custom_base_exc)
+
 
 TOKENIZER_EXCEPTIONS = _exc