mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 10:16:27 +03:00
Add tokenizer exceptions for ing verbs
Extend list of tokenizing exceptions introduced in 123810b
This commit is contained in:
parent
288298ead9
commit
c0691b2ab4
|
@ -387,6 +387,21 @@ for exc_data in [
|
||||||
{ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
|
{ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
|
||||||
{ORTH: "lovin'", LEMMA: "love", NORM: "loving"},
|
{ORTH: "lovin'", LEMMA: "love", NORM: "loving"},
|
||||||
{ORTH: "Lovin'", LEMMA: "love", NORM: "loving"},
|
{ORTH: "Lovin'", LEMMA: "love", NORM: "loving"},
|
||||||
|
{ORTH: "lovin", LEMMA: "love", NORM: "loving"},
|
||||||
|
{ORTH: "Lovin", LEMMA: "love", NORM: "loving"},
|
||||||
|
{ORTH: "havin'", LEMMA: "have", NORM: "having"},
|
||||||
|
{ORTH: "Havin'", LEMMA: "have", NORM: "having"},
|
||||||
|
{ORTH: "havin", LEMMA: "have", NORM: "having"},
|
||||||
|
{ORTH: "Havin", LEMMA: "have", NORM: "having"},
|
||||||
|
{ORTH: "doin'", LEMMA: "do", NORM: "doing"},
|
||||||
|
{ORTH: "Doin'", LEMMA: "do", NORM: "doing"},
|
||||||
|
{ORTH: "doin", LEMMA: "do", NORM: "doing"},
|
||||||
|
{ORTH: "Doin", LEMMA: "do", NORM: "doing"},
|
||||||
|
{ORTH: "goin'", LEMMA: "go", NORM: "going"},
|
||||||
|
{ORTH: "Goin'", LEMMA: "go", NORM: "going"},
|
||||||
|
{ORTH: "goin", LEMMA: "go", NORM: "going"},
|
||||||
|
{ORTH: "Goin", LEMMA: "go", NORM: "going"},
|
||||||
|
|
||||||
|
|
||||||
{ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
|
{ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
|
||||||
{ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},
|
{ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},
|
||||||
|
|
Loading…
Reference in New Issue
Block a user