From d757dec5c4fc7618dac7a7831504f7611ff75eb4 Mon Sep 17 00:00:00 2001 From: Adriane Boyd Date: Wed, 17 Aug 2022 12:13:54 +0200 Subject: [PATCH] Remove intify_attrs(_do_deprecated) (#11319) --- spacy/attrs.pyx | 71 +--------------------------------- spacy/tests/lang/test_attrs.py | 8 ---- spacy/tokenizer.pyx | 4 +- spacy/vocab.pyx | 3 +- 4 files changed, 4 insertions(+), 82 deletions(-) diff --git a/spacy/attrs.pyx b/spacy/attrs.pyx index dc8eed7c3..7b6fd9e94 100644 --- a/spacy/attrs.pyx +++ b/spacy/attrs.pyx @@ -97,7 +97,7 @@ NAMES = [key for key, value in sorted(IDS.items(), key=lambda item: item[1])] locals().update(IDS) -def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False): +def intify_attrs(stringy_attrs, strings_map=None): """ Normalize a dictionary of attributes, converting them to ints. @@ -109,75 +109,6 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False): converted to ints. """ inty_attrs = {} - if _do_deprecated: - if "F" in stringy_attrs: - stringy_attrs["ORTH"] = stringy_attrs.pop("F") - if "L" in stringy_attrs: - stringy_attrs["LEMMA"] = stringy_attrs.pop("L") - if "pos" in stringy_attrs: - stringy_attrs["TAG"] = stringy_attrs.pop("pos") - if "morph" in stringy_attrs: - morphs = stringy_attrs.pop("morph") - if "number" in stringy_attrs: - stringy_attrs.pop("number") - if "tenspect" in stringy_attrs: - stringy_attrs.pop("tenspect") - morph_keys = [ - "PunctType", - "PunctSide", - "Other", - "Degree", - "AdvType", - "Number", - "VerbForm", - "PronType", - "Aspect", - "Tense", - "PartType", - "Poss", - "Hyph", - "ConjType", - "NumType", - "Foreign", - "VerbType", - "NounType", - "Gender", - "Mood", - "Negative", - "Tense", - "Voice", - "Abbr", - "Derivation", - "Echo", - "Foreign", - "NameType", - "NounType", - "NumForm", - "NumValue", - "PartType", - "Polite", - "StyleVariant", - "PronType", - "AdjType", - "Person", - "Variant", - "AdpType", - "Reflex", - "Negative", - "Mood", - "Aspect", - "Case", - "Polarity", - "PrepCase", - "Animacy", # U20 - ] - for key in morph_keys: - if key in stringy_attrs: - stringy_attrs.pop(key) - elif key.lower() in stringy_attrs: - stringy_attrs.pop(key.lower()) - elif key.upper() in stringy_attrs: - stringy_attrs.pop(key.upper()) for name, value in stringy_attrs.items(): int_key = intify_attr(name) if int_key is not None: diff --git a/spacy/tests/lang/test_attrs.py b/spacy/tests/lang/test_attrs.py index 1c27c1744..1e1bae08c 100644 --- a/spacy/tests/lang/test_attrs.py +++ b/spacy/tests/lang/test_attrs.py @@ -26,14 +26,6 @@ def test_attrs_idempotence(text): assert intify_attrs(int_attrs) == {LEMMA: 10, IS_ALPHA: True} -@pytest.mark.parametrize("text", ["dog"]) -def test_attrs_do_deprecated(text): - int_attrs = intify_attrs( - {"F": text, "is_alpha": True}, strings_map={text: 10}, _do_deprecated=True - ) - assert int_attrs == {ORTH: 10, IS_ALPHA: True} - - def test_attrs_ent_iob_intify(): int_attrs = intify_attrs({"ENT_IOB": ""}) assert int_attrs == {ENT_IOB: 0} diff --git a/spacy/tokenizer.pyx b/spacy/tokenizer.pyx index 0e75b5f7a..972633a2f 100644 --- a/spacy/tokenizer.pyx +++ b/spacy/tokenizer.pyx @@ -582,7 +582,7 @@ cdef class Tokenizer: substrings (iterable): A sequence of dicts, where each dict describes a token and its attributes. """ - attrs = [intify_attrs(spec, _do_deprecated=True) for spec in substrings] + attrs = [intify_attrs(spec) for spec in substrings] orth = "".join([spec[ORTH] for spec in attrs]) if chunk != orth: raise ValueError(Errors.E997.format(chunk=chunk, orth=orth, token_attrs=substrings)) @@ -650,7 +650,7 @@ cdef class Tokenizer: url_match = re.compile("a^").match special_cases = {} for orth, special_tokens in self.rules.items(): - special_cases[orth] = [intify_attrs(special_token, strings_map=self.vocab.strings, _do_deprecated=True) for special_token in special_tokens] + special_cases[orth] = [intify_attrs(special_token, strings_map=self.vocab.strings) for special_token in special_tokens] tokens = [] for substring in text.split(): suffixes = [] diff --git a/spacy/vocab.pyx b/spacy/vocab.pyx index 428cadd82..af7d97933 100644 --- a/spacy/vocab.pyx +++ b/spacy/vocab.pyx @@ -268,8 +268,7 @@ cdef class Vocab: cdef int i tokens = self.mem.alloc(len(substrings) + 1, sizeof(TokenC)) for i, props in enumerate(substrings): - props = intify_attrs(props, strings_map=self.strings, - _do_deprecated=True) + props = intify_attrs(props, strings_map=self.strings) token = &tokens[i] # Set the special tokens up to have arbitrary attributes lex = self.get_by_orth(self.mem, props[ORTH])