mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-25 09:26:27 +03:00
Fix passing of morphological features to lemmatizer
This commit is contained in:
parent
74db1d9602
commit
b69013e2d7
|
@ -47,17 +47,6 @@ class Lemmatizer(object):
|
||||||
Check whether we're dealing with an uninflected paradigm, so we can
|
Check whether we're dealing with an uninflected paradigm, so we can
|
||||||
avoid lemmatization entirely.
|
avoid lemmatization entirely.
|
||||||
"""
|
"""
|
||||||
morphology = {} if morphology is None else morphology
|
|
||||||
morphology = dict(morphology)
|
|
||||||
for key, value in list(morphology.items()):
|
|
||||||
if value is True:
|
|
||||||
feat, value = key.split('_')
|
|
||||||
morphology[feat] = value
|
|
||||||
others = [
|
|
||||||
key
|
|
||||||
for key in morphology
|
|
||||||
if key not in (POS, "Number", "POS", "VerbForm", "Tense")
|
|
||||||
]
|
|
||||||
if univ_pos == "noun" and morphology.get("Number") == "sing":
|
if univ_pos == "noun" and morphology.get("Number") == "sing":
|
||||||
return True
|
return True
|
||||||
elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
|
elif univ_pos == "verb" and morphology.get("VerbForm") == "inf":
|
||||||
|
@ -68,7 +57,6 @@ class Lemmatizer(object):
|
||||||
morphology.get("VerbForm") == "fin"
|
morphology.get("VerbForm") == "fin"
|
||||||
and morphology.get("Tense") == "pres"
|
and morphology.get("Tense") == "pres"
|
||||||
and morphology.get("Number") is None
|
and morphology.get("Number") is None
|
||||||
and not others
|
|
||||||
):
|
):
|
||||||
return True
|
return True
|
||||||
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
elif univ_pos == "adj" and morphology.get("Degree") == "pos":
|
||||||
|
|
|
@ -141,7 +141,16 @@ cdef class Morphology:
|
||||||
return self.strings.add(py_string.lower())
|
return self.strings.add(py_string.lower())
|
||||||
cdef list lemma_strings
|
cdef list lemma_strings
|
||||||
cdef unicode lemma_string
|
cdef unicode lemma_string
|
||||||
lemma_strings = self.lemmatizer(py_string, univ_pos, morphology)
|
# Normalize features into a dict keyed by the field, to make life easier
|
||||||
|
# for the lemmatizer. Handles string-to-int conversion too.
|
||||||
|
string_feats = {}
|
||||||
|
for key, value in morphology.items():
|
||||||
|
if value is True:
|
||||||
|
name, value = self.strings.as_string(key).split('_', 1)
|
||||||
|
string_feats[name] = value
|
||||||
|
else:
|
||||||
|
string_feats[self.strings.as_string(key)] = self.strings.as_string(value)
|
||||||
|
lemma_strings = self.lemmatizer(py_string, univ_pos, string_feats)
|
||||||
lemma_string = lemma_strings[0]
|
lemma_string = lemma_strings[0]
|
||||||
lemma = self.strings.add(lemma_string)
|
lemma = self.strings.add(lemma_string)
|
||||||
return lemma
|
return lemma
|
||||||
|
|
Loading…
Reference in New Issue
Block a user