Add tokenizer exceptions for ing verbs

Extend list of tokenizing exceptions introduced in 123810b
This commit is contained in:
Mathias Deschamps 2017-11-13 12:11:16 +01:00
parent 288298ead9
commit c0691b2ab4

View File

@ -387,6 +387,21 @@ for exc_data in [
{ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
{ORTH: "lovin'", LEMMA: "love", NORM: "loving"},
{ORTH: "Lovin'", LEMMA: "love", NORM: "loving"},
{ORTH: "lovin", LEMMA: "love", NORM: "loving"},
{ORTH: "Lovin", LEMMA: "love", NORM: "loving"},
{ORTH: "havin'", LEMMA: "have", NORM: "having"},
{ORTH: "Havin'", LEMMA: "have", NORM: "having"},
{ORTH: "havin", LEMMA: "have", NORM: "having"},
{ORTH: "Havin", LEMMA: "have", NORM: "having"},
{ORTH: "doin'", LEMMA: "do", NORM: "doing"},
{ORTH: "Doin'", LEMMA: "do", NORM: "doing"},
{ORTH: "doin", LEMMA: "do", NORM: "doing"},
{ORTH: "Doin", LEMMA: "do", NORM: "doing"},
{ORTH: "goin'", LEMMA: "go", NORM: "going"},
{ORTH: "Goin'", LEMMA: "go", NORM: "going"},
{ORTH: "goin", LEMMA: "go", NORM: "going"},
{ORTH: "Goin", LEMMA: "go", NORM: "going"},
{ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
{ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},