mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 10:46:29 +03:00
Add lookup lemmatizer to lemmatizer as lookup() method
This commit is contained in:
parent
c1d6d43c83
commit
9fd471372a
|
@ -10,10 +10,11 @@ class Lemmatizer(object):
|
||||||
def load(cls, path, index=None, exc=None, rules=None):
|
def load(cls, path, index=None, exc=None, rules=None):
|
||||||
return cls(index or {}, exc or {}, rules or {})
|
return cls(index or {}, exc or {}, rules or {})
|
||||||
|
|
||||||
def __init__(self, index, exceptions, rules):
|
def __init__(self, index=None, exceptions=None, rules=None, lookup=None):
|
||||||
self.index = index
|
self.index = index if index is not None else {}
|
||||||
self.exc = exceptions
|
self.exc = exceptions if exceptions is not None else {}
|
||||||
self.rules = rules
|
self.rules = rules if rules is not None else {}
|
||||||
|
self.lookup_table = lookup if lookup is not None else {}
|
||||||
|
|
||||||
def __call__(self, string, univ_pos, morphology=None):
|
def __call__(self, string, univ_pos, morphology=None):
|
||||||
if univ_pos == NOUN:
|
if univ_pos == NOUN:
|
||||||
|
@ -79,6 +80,11 @@ class Lemmatizer(object):
|
||||||
def punct(self, string, morphology=None):
|
def punct(self, string, morphology=None):
|
||||||
return self(string, 'punct', morphology)
|
return self(string, 'punct', morphology)
|
||||||
|
|
||||||
|
def lookup(self, string):
|
||||||
|
if string in self.lookup_table:
|
||||||
|
return self.lookup_table[string]
|
||||||
|
return string
|
||||||
|
|
||||||
|
|
||||||
def lemmatize(string, index, exceptions, rules):
|
def lemmatize(string, index, exceptions, rules):
|
||||||
string = string.lower()
|
string = string.lower()
|
||||||
|
@ -102,18 +108,3 @@ def lemmatize(string, index, exceptions, rules):
|
||||||
if not forms:
|
if not forms:
|
||||||
forms.append(string)
|
forms.append(string)
|
||||||
return set(forms)
|
return set(forms)
|
||||||
|
|
||||||
|
|
||||||
class LookupLemmatizer(Lemmatizer):
|
|
||||||
@classmethod
|
|
||||||
def load(cls, path, lookup):
|
|
||||||
return cls(lookup or {})
|
|
||||||
|
|
||||||
def __init__(self, lookup):
|
|
||||||
self.lookup = lookup
|
|
||||||
|
|
||||||
def __call__(self, string, univ_pos, morphology=None):
|
|
||||||
try:
|
|
||||||
return set([self.lookup[string]])
|
|
||||||
except:
|
|
||||||
return set([string])
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user