From 2eb163c5dd675c2e7a9cedb5d6868545833cbf34 Mon Sep 17 00:00:00 2001 From: Ines Montani Date: Sun, 18 Dec 2016 15:36:20 +0100 Subject: [PATCH] Add lemma rules --- spacy/en/lemma_rules.py | 42 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 spacy/en/lemma_rules.py diff --git a/spacy/en/lemma_rules.py b/spacy/en/lemma_rules.py new file mode 100644 index 000000000..194712c24 --- /dev/null +++ b/spacy/en/lemma_rules.py @@ -0,0 +1,42 @@ +# encoding: utf8 +from __future__ import unicode_literals + + +LEMMA_RULES = { + "noun": [ + ["s", ""], + ["ses", "s"], + ["ves", "f"], + ["xes", "x"], + ["zes", "z"], + ["ches", "ch"], + ["shes", "sh"], + ["men", "man"], + ["ies", "y"] + ], + + "verb": [ + ["s", ""], + ["ies", "y"], + ["es", "e"], + ["es", ""], + ["ed", "e"], + ["ed", ""], + ["ing", "e"], + ["ing", ""] + ], + + "adj": [ + ["er", ""], + ["est", ""], + ["er", "e"], + ["est", "e"] + ], + + "punct": [ + ["“", "\""], + ["”", "\""], + ["\u2018", "'"], + ["\u2019", "'"] + ] +}