From c0691b2ab4a95d88c760a389c64fa76605cb2a59 Mon Sep 17 00:00:00 2001 From: Mathias Deschamps Date: Mon, 13 Nov 2017 12:11:16 +0100 Subject: [PATCH] Add tokenizer exceptions for ing verbs Extend list of tokenizing exceptions introduced in 123810b --- spacy/lang/en/tokenizer_exceptions.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index e870307af..064b7ea59 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -387,6 +387,21 @@ for exc_data in [ {ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"}, {ORTH: "lovin'", LEMMA: "love", NORM: "loving"}, {ORTH: "Lovin'", LEMMA: "love", NORM: "loving"}, + {ORTH: "lovin", LEMMA: "love", NORM: "loving"}, + {ORTH: "Lovin", LEMMA: "love", NORM: "loving"}, + {ORTH: "havin'", LEMMA: "have", NORM: "having"}, + {ORTH: "Havin'", LEMMA: "have", NORM: "having"}, + {ORTH: "havin", LEMMA: "have", NORM: "having"}, + {ORTH: "Havin", LEMMA: "have", NORM: "having"}, + {ORTH: "doin'", LEMMA: "do", NORM: "doing"}, + {ORTH: "Doin'", LEMMA: "do", NORM: "doing"}, + {ORTH: "doin", LEMMA: "do", NORM: "doing"}, + {ORTH: "Doin", LEMMA: "do", NORM: "doing"}, + {ORTH: "goin'", LEMMA: "go", NORM: "going"}, + {ORTH: "Goin'", LEMMA: "go", NORM: "going"}, + {ORTH: "goin", LEMMA: "go", NORM: "going"}, + {ORTH: "Goin", LEMMA: "go", NORM: "going"}, + {ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"}, {ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},