Return lowercase form as default except for PROPN

This commit is contained in:
Adriane Boyd 2020-05-20 15:35:08 +02:00
parent 4fa9670537
commit 8cba0e41d8

View File

@ -56,6 +56,11 @@ class Lemmatizer(object):
index_table = self.lookups.get_table("lemma_index", {})
exc_table = self.lookups.get_table("lemma_exc", {})
rules_table = self.lookups.get_table("lemma_rules", {})
if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
if univ_pos == "propn":
return [string]
else:
return [string.lower()]
lemmas = self.lemmatize(
string,
index_table.get(univ_pos, {}),