If no rules are set, lemmatize by lookup

This commit is contained in:
Matthew Honnibal 2017-12-06 12:12:11 +01:00
parent 04a92bd75e
commit 361944e512

View File

@ -8,15 +8,17 @@ from .symbols import VerbForm_inf, VerbForm_none, Number_sing, Degree_pos
class Lemmatizer(object): class Lemmatizer(object):
@classmethod @classmethod
def load(cls, path, index=None, exc=None, rules=None, lookup=None): def load(cls, path, index=None, exc=None, rules=None, lookup=None):
return cls(index or {}, exc or {}, rules or {}, lookup or {}) return cls(index, exc, rules, lookup)
def __init__(self, index=None, exceptions=None, rules=None, lookup=None): def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
self.index = index if index is not None else {} self.index = index
self.exc = exceptions if exceptions is not None else {} self.exc = exceptions
self.rules = rules if rules is not None else {} self.rules = rules
self.lookup_table = lookup if lookup is not None else {} self.lookup_table = lookup if lookup is not None else {}
def __call__(self, string, univ_pos, morphology=None): def __call__(self, string, univ_pos, morphology=None):
if not self.rules:
return [self.lookup_table.get(string, string)]
if univ_pos in (NOUN, 'NOUN', 'noun'): if univ_pos in (NOUN, 'NOUN', 'noun'):
univ_pos = 'noun' univ_pos = 'noun'
elif univ_pos in (VERB, 'VERB', 'verb'): elif univ_pos in (VERB, 'VERB', 'verb'):