diff --git a/.gitignore b/.gitignore index 14097dfcd..6afa40f38 100644 --- a/.gitignore +++ b/.gitignore @@ -97,3 +97,6 @@ Desktop.ini # Other *.tgz + +# Pycharm project files +*.idea diff --git a/spacy/lang/en/norm_exceptions.py b/spacy/lang/en/norm_exceptions.py index 49c8ef6ab..402dc3c91 100644 --- a/spacy/lang/en/norm_exceptions.py +++ b/spacy/lang/en/norm_exceptions.py @@ -459,6 +459,8 @@ _exc = { "disorganised": "disorganized", "distil": "distill", "distils": "distills", + "doin": "doing", + "doin'": "doing", "dramatisation": "dramatization", "dramatisations": "dramatizations", "dramatise": "dramatize", @@ -687,6 +689,8 @@ _exc = { "globalises": "globalizes", "globalising": "globalizing", "glueing ": "gluing ", + "goin": "going", + "goin'":"going", "goitre": "goiter", "goitres": "goiters", "gonorrhoea": "gonorrhea", @@ -733,6 +737,8 @@ _exc = { "harmonised": "harmonized", "harmonises": "harmonizes", "harmonising": "harmonizing", + "havin": "having", + "havin'": "having", "homoeopath": "homeopath", "homoeopathic": "homeopathic", "homoeopaths": "homeopaths", @@ -924,6 +930,8 @@ _exc = { "localised": "localized", "localises": "localizes", "localising": "localizing", + "lovin": "loving", + "lovin'": "loving", "louvre": "louver", "louvred": "louvered", "louvres": "louvers ", diff --git a/spacy/lang/en/tokenizer_exceptions.py b/spacy/lang/en/tokenizer_exceptions.py index e870307af..064b7ea59 100644 --- a/spacy/lang/en/tokenizer_exceptions.py +++ b/spacy/lang/en/tokenizer_exceptions.py @@ -387,6 +387,21 @@ for exc_data in [ {ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"}, {ORTH: "lovin'", LEMMA: "love", NORM: "loving"}, {ORTH: "Lovin'", LEMMA: "love", NORM: "loving"}, + {ORTH: "lovin", LEMMA: "love", NORM: "loving"}, + {ORTH: "Lovin", LEMMA: "love", NORM: "loving"}, + {ORTH: "havin'", LEMMA: "have", NORM: "having"}, + {ORTH: "Havin'", LEMMA: "have", NORM: "having"}, + {ORTH: "havin", LEMMA: "have", NORM: "having"}, + {ORTH: "Havin", LEMMA: "have", NORM: "having"}, + {ORTH: "doin'", LEMMA: "do", NORM: "doing"}, + {ORTH: "Doin'", LEMMA: "do", NORM: "doing"}, + {ORTH: "doin", LEMMA: "do", NORM: "doing"}, + {ORTH: "Doin", LEMMA: "do", NORM: "doing"}, + {ORTH: "goin'", LEMMA: "go", NORM: "going"}, + {ORTH: "Goin'", LEMMA: "go", NORM: "going"}, + {ORTH: "goin", LEMMA: "go", NORM: "going"}, + {ORTH: "Goin", LEMMA: "go", NORM: "going"}, + {ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"}, {ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},