mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-03 13:14:11 +03:00
Return lowercase form as default except for PROPN
This commit is contained in:
parent
4fa9670537
commit
8cba0e41d8
|
@ -56,6 +56,11 @@ class Lemmatizer(object):
|
||||||
index_table = self.lookups.get_table("lemma_index", {})
|
index_table = self.lookups.get_table("lemma_index", {})
|
||||||
exc_table = self.lookups.get_table("lemma_exc", {})
|
exc_table = self.lookups.get_table("lemma_exc", {})
|
||||||
rules_table = self.lookups.get_table("lemma_rules", {})
|
rules_table = self.lookups.get_table("lemma_rules", {})
|
||||||
|
if not any((index_table.get(univ_pos), exc_table.get(univ_pos), rules_table.get(univ_pos))):
|
||||||
|
if univ_pos == "propn":
|
||||||
|
return [string]
|
||||||
|
else:
|
||||||
|
return [string.lower()]
|
||||||
lemmas = self.lemmatize(
|
lemmas = self.lemmatize(
|
||||||
string,
|
string,
|
||||||
index_table.get(univ_pos, {}),
|
index_table.get(univ_pos, {}),
|
||||||
|
|
Loading…
Reference in New Issue
Block a user