mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 01:34:30 +03:00
Merge pull request #4217 from adrianeboyd/bugfix/morph-en-serialization
Morphology tag_map-related bugfixes
This commit is contained in:
commit
efcb51ddc8
|
@ -14,8 +14,8 @@ TAG_MAP = {
|
|||
'""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||
"''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"},
|
||||
":": {POS: PUNCT},
|
||||
"$": {POS: SYM, "Other": {"SymType": "currency"}},
|
||||
"#": {POS: SYM, "Other": {"SymType": "numbersign"}},
|
||||
"$": {POS: SYM},
|
||||
"#": {POS: SYM},
|
||||
"AFX": {POS: ADJ, "Hyph": "yes"},
|
||||
"CC": {POS: CCONJ, "ConjType": "comp"},
|
||||
"CD": {POS: NUM, "NumType": "card"},
|
||||
|
@ -34,7 +34,7 @@ TAG_MAP = {
|
|||
"NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"},
|
||||
"NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"},
|
||||
"NNS": {POS: NOUN, "Number": "plur"},
|
||||
"PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"},
|
||||
"PDT": {POS: DET},
|
||||
"POS": {POS: PART, "Poss": "yes"},
|
||||
"PRP": {POS: PRON, "PronType": "prs"},
|
||||
"PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"},
|
||||
|
@ -58,10 +58,10 @@ TAG_MAP = {
|
|||
"Number": "sing",
|
||||
"Person": "three",
|
||||
},
|
||||
"WDT": {POS: PRON, "PronType": "int|rel"},
|
||||
"WP": {POS: PRON, "PronType": "int|rel"},
|
||||
"WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"},
|
||||
"WRB": {POS: ADV, "PronType": "int|rel"},
|
||||
"WDT": {POS: PRON},
|
||||
"WP": {POS: PRON},
|
||||
"WP$": {POS: PRON, "Poss": "yes"},
|
||||
"WRB": {POS: ADV},
|
||||
"ADD": {POS: X},
|
||||
"NFP": {POS: PUNCT},
|
||||
"GW": {POS: X},
|
||||
|
|
|
@ -327,16 +327,11 @@ cdef class Morphology:
|
|||
self.add_special_case(tag_str, form_str, attrs)
|
||||
|
||||
def to_bytes(self, exclude=tuple(), **kwargs):
|
||||
tag_map = {}
|
||||
for key in self.tags:
|
||||
tag_ptr = <MorphAnalysisC*>self.tags.get(key)
|
||||
if tag_ptr != NULL:
|
||||
tag_map[key] = tag_to_json(tag_ptr)
|
||||
exceptions = {}
|
||||
for (tag_str, orth_int), attrs in sorted(self.exc.items()):
|
||||
exceptions.setdefault(tag_str, {})
|
||||
exceptions[tag_str][self.strings[orth_int]] = attrs
|
||||
data = {"tag_map": tag_map, "exceptions": exceptions}
|
||||
data = {"tag_map": self.tag_map, "exceptions": exceptions}
|
||||
return srsly.msgpack_dumps(data)
|
||||
|
||||
def from_bytes(self, byte_string):
|
||||
|
@ -898,7 +893,7 @@ FEATURES = [
|
|||
"Aspect_mod",
|
||||
"Aspect_none",
|
||||
"Aspect_perf",
|
||||
"Aspect_prof",
|
||||
"Aspect_prog",
|
||||
"Aspect_prosp",
|
||||
"Case_abe",
|
||||
"Case_abl",
|
||||
|
|
Loading…
Reference in New Issue
Block a user