From 02babf931793f4e2d372a6c89ef0ba3df9573140 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 30 Aug 2019 11:29:19 +0200 Subject: [PATCH 1/2] English tag map without unsupported features/values --- spacy/lang/en/tag_map.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/spacy/lang/en/tag_map.py b/spacy/lang/en/tag_map.py index 5c9a97786..9bd884a3a 100644 --- a/spacy/lang/en/tag_map.py +++ b/spacy/lang/en/tag_map.py @@ -14,8 +14,8 @@ TAG_MAP = { '""': {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, "''": {POS: PUNCT, "PunctType": "quot", "PunctSide": "fin"}, ":": {POS: PUNCT}, - "$": {POS: SYM, "Other": {"SymType": "currency"}}, - "#": {POS: SYM, "Other": {"SymType": "numbersign"}}, + "$": {POS: SYM}, + "#": {POS: SYM}, "AFX": {POS: ADJ, "Hyph": "yes"}, "CC": {POS: CCONJ, "ConjType": "comp"}, "CD": {POS: NUM, "NumType": "card"}, @@ -34,7 +34,7 @@ TAG_MAP = { "NNP": {POS: PROPN, "NounType": "prop", "Number": "sing"}, "NNPS": {POS: PROPN, "NounType": "prop", "Number": "plur"}, "NNS": {POS: NOUN, "Number": "plur"}, - "PDT": {POS: DET, "AdjType": "pdt", "PronType": "prn"}, + "PDT": {POS: DET}, "POS": {POS: PART, "Poss": "yes"}, "PRP": {POS: PRON, "PronType": "prs"}, "PRP$": {POS: PRON, "PronType": "prs", "Poss": "yes"}, @@ -58,10 +58,10 @@ TAG_MAP = { "Number": "sing", "Person": "three", }, - "WDT": {POS: PRON, "PronType": "int|rel"}, - "WP": {POS: PRON, "PronType": "int|rel"}, - "WP$": {POS: PRON, "Poss": "yes", "PronType": "int|rel"}, - "WRB": {POS: ADV, "PronType": "int|rel"}, + "WDT": {POS: PRON}, + "WP": {POS: PRON}, + "WP$": {POS: PRON, "Poss": "yes"}, + "WRB": {POS: ADV}, "ADD": {POS: X}, "NFP": {POS: PUNCT}, "GW": {POS: X}, From 893f11a9e38d4a29c608602e61798a29d4800d99 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Fri, 30 Aug 2019 11:30:03 +0200 Subject: [PATCH 2/2] Serialize tag_map directly Fix Aspect_prof typo --- spacy/morphology.pyx | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/spacy/morphology.pyx b/spacy/morphology.pyx index a7a1bee57..f706fec7f 100644 --- a/spacy/morphology.pyx +++ b/spacy/morphology.pyx @@ -327,16 +327,11 @@ cdef class Morphology: self.add_special_case(tag_str, form_str, attrs) def to_bytes(self, exclude=tuple(), **kwargs): - tag_map = {} - for key in self.tags: - tag_ptr = self.tags.get(key) - if tag_ptr != NULL: - tag_map[key] = tag_to_json(tag_ptr) exceptions = {} for (tag_str, orth_int), attrs in sorted(self.exc.items()): exceptions.setdefault(tag_str, {}) exceptions[tag_str][self.strings[orth_int]] = attrs - data = {"tag_map": tag_map, "exceptions": exceptions} + data = {"tag_map": self.tag_map, "exceptions": exceptions} return srsly.msgpack_dumps(data) def from_bytes(self, byte_string): @@ -898,7 +893,7 @@ FEATURES = [ "Aspect_mod", "Aspect_none", "Aspect_perf", - "Aspect_prof", + "Aspect_prog", "Aspect_prosp", "Case_abe", "Case_abl",