Merge pull request #4217 from adrianeboyd/bugfix/morph-en-serialization

Morphology tag_map-related bugfixes
This commit is contained in:
Matthew Honnibal 2019-08-30 12:46:29 +02:00 committed by GitHub
commit efcb51ddc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 14 deletions

View File

@ -14,8 +14,8 @@ TAG_MAP = {
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
":": {POS: PUNCT}, ":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}}, "$": {POS: SYM},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}}, "#": {POS: SYM},
"AFX": {POS: ADJ, "Hyph": "yes"}, "AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CCONJ, "ConjType": "comp"}, "CC": {POS: CCONJ, "ConjType": "comp"},
"CD": {POS: NUM, "NumType": "card"}, "CD": {POS: NUM, "NumType": "card"},
@ -34,7 +34,7 @@ TAG_MAP = {
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
"NNS": {POS: NOUN, "Number": "plur"}, "NNS": {POS: NOUN, "Number": "plur"},
"PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"}, "PDT": {POS: DET},
"POS": {POS: PART, "Poss": "yes"}, "POS": {POS: PART, "Poss": "yes"},
"PRP": {POS: PRON, "PronType": "prs"}, "PRP": {POS: PRON, "PronType": "prs"},
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"}, "PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
@ -58,10 +58,10 @@ TAG_MAP = {
"Number": "sing", "Number": "sing",
"Person": "three", "Person": "three",
}, },
"WDT": {POS: PRON, "PronType": "int|rel"}, "WDT": {POS: PRON},
"WP": {POS: PRON, "PronType": "int|rel"}, "WP": {POS: PRON},
"WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"}, "WP$": {POS: PRON, "Poss": "yes"},
"WRB": {POS: ADV, "PronType": "int|rel"}, "WRB": {POS: ADV},
"ADD": {POS: X}, "ADD": {POS: X},
"NFP": {POS: PUNCT}, "NFP": {POS: PUNCT},
"GW": {POS: X}, "GW": {POS: X},

View File

@ -327,16 +327,11 @@ cdef class Morphology:
self.add_special_case(tag_str, form_str, attrs) self.add_special_case(tag_str, form_str, attrs)
def to_bytes(self, exclude=tuple(), **kwargs): def to_bytes(self, exclude=tuple(), **kwargs):
tag_map = {}
for key in self.tags:
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
if tag_ptr != NULL:
tag_map[key] = tag_to_json(tag_ptr)
exceptions = {} exceptions = {}
for (tag_str, orth_int), attrs in sorted(self.exc.items()): for (tag_str, orth_int), attrs in sorted(self.exc.items()):
exceptions.setdefault(tag_str, {}) exceptions.setdefault(tag_str, {})
exceptions[tag_str][self.strings[orth_int]] = attrs exceptions[tag_str][self.strings[orth_int]] = attrs
data = {"tag_map": tag_map, "exceptions": exceptions} data = {"tag_map": self.tag_map, "exceptions": exceptions}
return srsly.msgpack_dumps(data) return srsly.msgpack_dumps(data)
def from_bytes(self, byte_string): def from_bytes(self, byte_string):
@ -898,7 +893,7 @@ FEATURES = [
"Aspect_mod", "Aspect_mod",
"Aspect_none", "Aspect_none",
"Aspect_perf", "Aspect_perf",
"Aspect_prof", "Aspect_prog",
"Aspect_prosp", "Aspect_prosp",
"Case_abe", "Case_abe",
"Case_abl", "Case_abl",