mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-12 17:22:25 +03:00
Merge pull request #4217 from adrianeboyd/bugfix/morph-en-serialization
Morphology tag_map-related bugfixes
This commit is contained in:
commit
efcb51ddc8
|
@ -14,8 +14,8 @@ TAG_MAP = {
|
||||||
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||||
":": {POS: PUNCT},
|
":": {POS: PUNCT},
|
||||||
"$": {POS: SYM, "Other": {"SymType": "currency"}},
|
"$": {POS: SYM},
|
||||||
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
|
"#": {POS: SYM},
|
||||||
"AFX": {POS: ADJ, "Hyph": "yes"},
|
"AFX": {POS: ADJ, "Hyph": "yes"},
|
||||||
"CC": {POS: CCONJ, "ConjType": "comp"},
|
"CC": {POS: CCONJ, "ConjType": "comp"},
|
||||||
"CD": {POS: NUM, "NumType": "card"},
|
"CD": {POS: NUM, "NumType": "card"},
|
||||||
|
@ -34,7 +34,7 @@ TAG_MAP = {
|
||||||
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
|
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
|
||||||
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
|
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
|
||||||
"NNS": {POS: NOUN, "Number": "plur"},
|
"NNS": {POS: NOUN, "Number": "plur"},
|
||||||
"PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"},
|
"PDT": {POS: DET},
|
||||||
"POS": {POS: PART, "Poss": "yes"},
|
"POS": {POS: PART, "Poss": "yes"},
|
||||||
"PRP": {POS: PRON, "PronType": "prs"},
|
"PRP": {POS: PRON, "PronType": "prs"},
|
||||||
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
|
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
|
||||||
|
@ -58,10 +58,10 @@ TAG_MAP = {
|
||||||
"Number": "sing",
|
"Number": "sing",
|
||||||
"Person": "three",
|
"Person": "three",
|
||||||
},
|
},
|
||||||
"WDT": {POS: PRON, "PronType": "int|rel"},
|
"WDT": {POS: PRON},
|
||||||
"WP": {POS: PRON, "PronType": "int|rel"},
|
"WP": {POS: PRON},
|
||||||
"WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"},
|
"WP$": {POS: PRON, "Poss": "yes"},
|
||||||
"WRB": {POS: ADV, "PronType": "int|rel"},
|
"WRB": {POS: ADV},
|
||||||
"ADD": {POS: X},
|
"ADD": {POS: X},
|
||||||
"NFP": {POS: PUNCT},
|
"NFP": {POS: PUNCT},
|
||||||
"GW": {POS: X},
|
"GW": {POS: X},
|
||||||
|
|
|
@ -327,16 +327,11 @@ cdef class Morphology:
|
||||||
self.add_special_case(tag_str, form_str, attrs)
|
self.add_special_case(tag_str, form_str, attrs)
|
||||||
|
|
||||||
def to_bytes(self, exclude=tuple(), **kwargs):
|
def to_bytes(self, exclude=tuple(), **kwargs):
|
||||||
tag_map = {}
|
|
||||||
for key in self.tags:
|
|
||||||
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
|
|
||||||
if tag_ptr != NULL:
|
|
||||||
tag_map[key] = tag_to_json(tag_ptr)
|
|
||||||
exceptions = {}
|
exceptions = {}
|
||||||
for (tag_str, orth_int), attrs in sorted(self.exc.items()):
|
for (tag_str, orth_int), attrs in sorted(self.exc.items()):
|
||||||
exceptions.setdefault(tag_str, {})
|
exceptions.setdefault(tag_str, {})
|
||||||
exceptions[tag_str][self.strings[orth_int]] = attrs
|
exceptions[tag_str][self.strings[orth_int]] = attrs
|
||||||
data = {"tag_map": tag_map, "exceptions": exceptions}
|
data = {"tag_map": self.tag_map, "exceptions": exceptions}
|
||||||
return srsly.msgpack_dumps(data)
|
return srsly.msgpack_dumps(data)
|
||||||
|
|
||||||
def from_bytes(self, byte_string):
|
def from_bytes(self, byte_string):
|
||||||
|
@ -898,7 +893,7 @@ FEATURES = [
|
||||||
"Aspect_mod",
|
"Aspect_mod",
|
||||||
"Aspect_none",
|
"Aspect_none",
|
||||||
"Aspect_perf",
|
"Aspect_perf",
|
||||||
"Aspect_prof",
|
"Aspect_prog",
|
||||||
"Aspect_prosp",
|
"Aspect_prosp",
|
||||||
"Case_abe",
|
"Case_abe",
|
||||||
"Case_abl",
|
"Case_abl",
|
||||||
|
|
Loading…
Reference in New Issue
Block a user