Remove intify_attrs(_do_deprecated) (#11319)

2025-09-19 10:32:40 +03:00 · 2022-08-17 12:13:54 +02:00 · 2022-08-17 12:13:54 +02:00 · d757dec5c4
commit d757dec5c4
parent 551e73ccfc
4 changed files with 4 additions and 82 deletions
--- a/spacy/attrs.pyx
+++ b/spacy/attrs.pyx
@ -97,7 +97,7 @@ NAMES = [key for key, value in sorted(IDS.items(), key=lambda item: item[1])]
 locals().update(IDS)
-def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
+def intify_attrs(stringy_attrs, strings_map=None):
    """
    Normalize a dictionary of attributes, converting them to ints.
@ -109,75 +109,6 @@ def intify_attrs(stringy_attrs, strings_map=None, _do_deprecated=False):
        converted to ints.
    """
    inty_attrs = {}
    if _do_deprecated:
        if "F" in stringy_attrs:
            stringy_attrs["ORTH"] = stringy_attrs.pop("F")
        if "L" in stringy_attrs:
            stringy_attrs["LEMMA"] = stringy_attrs.pop("L")
        if "pos" in stringy_attrs:
            stringy_attrs["TAG"] = stringy_attrs.pop("pos")
        if "morph" in stringy_attrs:
            morphs = stringy_attrs.pop("morph")
        if "number" in stringy_attrs:
            stringy_attrs.pop("number")
        if "tenspect" in stringy_attrs:
            stringy_attrs.pop("tenspect")
        morph_keys = [
            "PunctType",
            "PunctSide",
            "Other",
            "Degree",
            "AdvType",
            "Number",
            "VerbForm",
            "PronType",
            "Aspect",
            "Tense",
            "PartType",
            "Poss",
            "Hyph",
            "ConjType",
            "NumType",
            "Foreign",
            "VerbType",
            "NounType",
            "Gender",
            "Mood",
            "Negative",
            "Tense",
            "Voice",
            "Abbr",
            "Derivation",
            "Echo",
            "Foreign",
            "NameType",
            "NounType",
            "NumForm",
            "NumValue",
            "PartType",
            "Polite",
            "StyleVariant",
            "PronType",
            "AdjType",
            "Person",
            "Variant",
            "AdpType",
            "Reflex",
            "Negative",
            "Mood",
            "Aspect",
            "Case",
            "Polarity",
            "PrepCase",
            "Animacy",  # U20
        ]
        for key in morph_keys:
            if key in stringy_attrs:
                stringy_attrs.pop(key)
            elif key.lower() in stringy_attrs:
                stringy_attrs.pop(key.lower())
            elif key.upper() in stringy_attrs:
                stringy_attrs.pop(key.upper())
    for name, value in stringy_attrs.items():
        int_key = intify_attr(name)
        if int_key is not None:
--- a/spacy/tests/lang/test_attrs.py
+++ b/spacy/tests/lang/test_attrs.py
@ -26,14 +26,6 @@ def test_attrs_idempotence(text):
    assert intify_attrs(int_attrs) == {LEMMA: 10, IS_ALPHA: True}
@pytest.mark.parametrize("text", ["dog"])
 def test_attrs_do_deprecated(text):
    int_attrs = intify_attrs(
        {"F": text, "is_alpha": True}, strings_map={text: 10}, _do_deprecated=True
    )
    assert int_attrs == {ORTH: 10, IS_ALPHA: True}
 def test_attrs_ent_iob_intify():
    int_attrs = intify_attrs({"ENT_IOB": ""})
    assert int_attrs == {ENT_IOB: 0}
--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -582,7 +582,7 @@ cdef class Tokenizer:
        substrings (iterable): A sequence of dicts, where each dict describes
            a token and its attributes.
        """
-        attrs = [intify_attrs(spec, _do_deprecated=True) for spec in substrings]
+        attrs = [intify_attrs(spec) for spec in substrings]
        orth = "".join([spec[ORTH] for spec in attrs])
        if chunk != orth:
            raise ValueError(Errors.E997.format(chunk=chunk, orth=orth, token_attrs=substrings))
@ -650,7 +650,7 @@ cdef class Tokenizer:
            url_match = re.compile("a^").match
        special_cases = {}
        for orth, special_tokens in self.rules.items():
-            special_cases[orth] = [intify_attrs(special_token, strings_map=self.vocab.strings, _do_deprecated=True) for special_token in special_tokens]
+            special_cases[orth] = [intify_attrs(special_token, strings_map=self.vocab.strings) for special_token in special_tokens]
        tokens = []
        for substring in text.split():
            suffixes = []
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -268,8 +268,7 @@ cdef class Vocab:
        cdef int i
        tokens = <TokenC*>self.mem.alloc(len(substrings) + 1, sizeof(TokenC))
        for i, props in enumerate(substrings):
-            props = intify_attrs(props, strings_map=self.strings,
+            props = intify_attrs(props, strings_map=self.strings)
                                 _do_deprecated=True)
            token = &tokens[i]
            # Set the special tokens up to have arbitrary attributes
            lex = <LexemeC*>self.get_by_orth(self.mem, props[ORTH])