mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
Add exceptions to tokenizer and norm
This commit is contained in:
commit
ea6c85c67a
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -97,3 +97,6 @@ Desktop.ini
|
|||
|
||||
# Other
|
||||
*.tgz
|
||||
|
||||
# Pycharm project files
|
||||
*.idea
|
||||
|
|
|
@ -459,6 +459,8 @@ _exc = {
|
|||
"disorganised": "disorganized",
|
||||
"distil": "distill",
|
||||
"distils": "distills",
|
||||
"doin": "doing",
|
||||
"doin'": "doing",
|
||||
"dramatisation": "dramatization",
|
||||
"dramatisations": "dramatizations",
|
||||
"dramatise": "dramatize",
|
||||
|
@ -687,6 +689,8 @@ _exc = {
|
|||
"globalises": "globalizes",
|
||||
"globalising": "globalizing",
|
||||
"glueing ": "gluing ",
|
||||
"goin": "going",
|
||||
"goin'":"going",
|
||||
"goitre": "goiter",
|
||||
"goitres": "goiters",
|
||||
"gonorrhoea": "gonorrhea",
|
||||
|
@ -733,6 +737,8 @@ _exc = {
|
|||
"harmonised": "harmonized",
|
||||
"harmonises": "harmonizes",
|
||||
"harmonising": "harmonizing",
|
||||
"havin": "having",
|
||||
"havin'": "having",
|
||||
"homoeopath": "homeopath",
|
||||
"homoeopathic": "homeopathic",
|
||||
"homoeopaths": "homeopaths",
|
||||
|
@ -924,6 +930,8 @@ _exc = {
|
|||
"localised": "localized",
|
||||
"localises": "localizes",
|
||||
"localising": "localizing",
|
||||
"lovin": "loving",
|
||||
"lovin'": "loving",
|
||||
"louvre": "louver",
|
||||
"louvred": "louvered",
|
||||
"louvres": "louvers ",
|
||||
|
|
|
@ -387,6 +387,21 @@ for exc_data in [
|
|||
{ORTH: "O'clock", LEMMA: "o'clock", NORM: "o'clock"},
|
||||
{ORTH: "lovin'", LEMMA: "love", NORM: "loving"},
|
||||
{ORTH: "Lovin'", LEMMA: "love", NORM: "loving"},
|
||||
{ORTH: "lovin", LEMMA: "love", NORM: "loving"},
|
||||
{ORTH: "Lovin", LEMMA: "love", NORM: "loving"},
|
||||
{ORTH: "havin'", LEMMA: "have", NORM: "having"},
|
||||
{ORTH: "Havin'", LEMMA: "have", NORM: "having"},
|
||||
{ORTH: "havin", LEMMA: "have", NORM: "having"},
|
||||
{ORTH: "Havin", LEMMA: "have", NORM: "having"},
|
||||
{ORTH: "doin'", LEMMA: "do", NORM: "doing"},
|
||||
{ORTH: "Doin'", LEMMA: "do", NORM: "doing"},
|
||||
{ORTH: "doin", LEMMA: "do", NORM: "doing"},
|
||||
{ORTH: "Doin", LEMMA: "do", NORM: "doing"},
|
||||
{ORTH: "goin'", LEMMA: "go", NORM: "going"},
|
||||
{ORTH: "Goin'", LEMMA: "go", NORM: "going"},
|
||||
{ORTH: "goin", LEMMA: "go", NORM: "going"},
|
||||
{ORTH: "Goin", LEMMA: "go", NORM: "going"},
|
||||
|
||||
|
||||
{ORTH: "Mt.", LEMMA: "Mount", NORM: "Mount"},
|
||||
{ORTH: "Ak.", LEMMA: "Alaska", NORM: "Alaska"},
|
||||
|
|
Loading…
Reference in New Issue
Block a user