Merge pull request #4217 from adrianeboyd/bugfix/morph-en-serialization

Morphology tag_map-related bugfixes
This commit is contained in:
Matthew Honnibal 2019-08-30 12:46:29 +02:00 committed by GitHub
commit efcb51ddc8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 14 deletions

View File

@ -14,8 +14,8 @@ TAG_MAP = {
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
":": {POS: PUNCT},
"$": {POS: SYM, "Other": {"SymType": "currency"}},
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
"$": {POS: SYM},
"#": {POS: SYM},
"AFX": {POS: ADJ, "Hyph": "yes"},
"CC": {POS: CCONJ, "ConjType": "comp"},
"CD": {POS: NUM, "NumType": "card"},
@ -34,7 +34,7 @@ TAG_MAP = {
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
"NNS": {POS: NOUN, "Number": "plur"},
"PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"},
"PDT": {POS: DET},
"POS": {POS: PART, "Poss": "yes"},
"PRP": {POS: PRON, "PronType": "prs"},
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
@ -58,10 +58,10 @@ TAG_MAP = {
"Number": "sing",
"Person": "three",
},
"WDT": {POS: PRON, "PronType": "int|rel"},
"WP": {POS: PRON, "PronType": "int|rel"},
"WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"},
"WRB": {POS: ADV, "PronType": "int|rel"},
"WDT": {POS: PRON},
"WP": {POS: PRON},
"WP$": {POS: PRON, "Poss": "yes"},
"WRB": {POS: ADV},
"ADD": {POS: X},
"NFP": {POS: PUNCT},
"GW": {POS: X},

View File

@ -327,16 +327,11 @@ cdef class Morphology:
self.add_special_case(tag_str, form_str, attrs)
def to_bytes(self, exclude=tuple(), **kwargs):
tag_map = {}
for key in self.tags:
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
if tag_ptr != NULL:
tag_map[key] = tag_to_json(tag_ptr)
exceptions = {}
for (tag_str, orth_int), attrs in sorted(self.exc.items()):
exceptions.setdefault(tag_str, {})
exceptions[tag_str][self.strings[orth_int]] = attrs
data = {"tag_map": tag_map, "exceptions": exceptions}
data = {"tag_map": self.tag_map, "exceptions": exceptions}
return srsly.msgpack_dumps(data)
def from_bytes(self, byte_string):
@ -898,7 +893,7 @@ FEATURES = [
"Aspect_mod",
"Aspect_none",
"Aspect_perf",
"Aspect_prof",
"Aspect_prog",
"Aspect_prosp",
"Case_abe",
"Case_abl",