Remove unused mem from function args

Two methods calling _new_lexeme, get and get_by_orth, took mem arguments just to call the internal method. That's no longer necessary, so this cleans it up.
2025-09-13 23:52:38 +03:00 · 2023-01-23 13:12:51 +09:00 · 2023-01-23 13:12:51 +09:00 · ea81e747f3
commit ea81e747f3
parent 51eb625cec
7 changed files with 24 additions and 26 deletions
--- a/spacy/lexeme.pyx
+++ b/spacy/lexeme.pyx
@ -41,7 +41,7 @@ cdef class Lexeme:
        """
        self.vocab = vocab
        self.orth = orth
-        self.c = <LexemeC*><void*>vocab.get_by_orth(vocab.mem, orth)
+        self.c = <LexemeC*><void*>vocab.get_by_orth(orth)
        if self.c.orth != orth:
            raise ValueError(Errors.E071.format(orth=orth, vocab_orth=self.c.orth))

--- a/spacy/tokenizer.pyx
+++ b/spacy/tokenizer.pyx
@ -419,7 +419,7 @@ cdef class Tokenizer:
                minus_pre = string[pre_len:]
                if minus_pre and with_special_cases and self._specials.get(hash_string(minus_pre)) != NULL:
                    string = minus_pre
-                    prefixes.push_back(self.vocab.get(mem, prefix))
+                    prefixes.push_back(self.vocab.get(prefix))
                    break
            suf_len = self.find_suffix(string[pre_len:])
            if suf_len != 0:
@ -427,18 +427,18 @@ cdef class Tokenizer:
                minus_suf = string[:-suf_len]
                if minus_suf and with_special_cases and self._specials.get(hash_string(minus_suf)) != NULL:
                    string = minus_suf
-                    suffixes.push_back(self.vocab.get(mem, suffix))
+                    suffixes.push_back(self.vocab.get(suffix))
                    break
            if pre_len and suf_len and (pre_len + suf_len) <= len(string):
                string = string[pre_len:-suf_len]
-                prefixes.push_back(self.vocab.get(mem, prefix))
-                suffixes.push_back(self.vocab.get(mem, suffix))
+                prefixes.push_back(self.vocab.get(prefix))
+                suffixes.push_back(self.vocab.get(suffix))
            elif pre_len:
                string = minus_pre
-                prefixes.push_back(self.vocab.get(mem, prefix))
+                prefixes.push_back(self.vocab.get(prefix))
            elif suf_len:
                string = minus_suf
-                suffixes.push_back(self.vocab.get(mem, suffix))
+                suffixes.push_back(self.vocab.get(suffix))
        return string

    cdef int _attach_tokens(self, Doc tokens, str string,
@ -465,11 +465,11 @@ cdef class Tokenizer:
                # We're always saying 'no' to spaces here -- the caller will
                # fix up the outermost one, with reference to the original.
                # See Issue #859
-                tokens.push_back(self.vocab.get(tokens.mem, string), False)
+                tokens.push_back(self.vocab.get(string), False)
            else:
                matches = self.find_infix(string)
                if not matches:
-                    tokens.push_back(self.vocab.get(tokens.mem, string), False)
+                    tokens.push_back(self.vocab.get(string), False)
                else:
                    # Let's say we have dyn-o-mite-dave - the regex finds the
                    # start and end positions of the hyphens
@ -484,7 +484,7 @@ cdef class Tokenizer:

                        if infix_start != start:
                            span = string[start:infix_start]
-                            tokens.push_back(self.vocab.get(tokens.mem, span), False)
+                            tokens.push_back(self.vocab.get(span), False)

                        if infix_start != infix_end:
                            # If infix_start != infix_end, it means the infix
@ -492,11 +492,11 @@ cdef class Tokenizer:
                            # for tokenization in some languages (see
                            # https://github.com/explosion/spaCy/issues/768)
                            infix_span = string[infix_start:infix_end]
-                            tokens.push_back(self.vocab.get(tokens.mem, infix_span), False)
+                            tokens.push_back(self.vocab.get(infix_span), False)
                        start = infix_end
                    span = string[start:]
                    if span:
-                        tokens.push_back(self.vocab.get(tokens.mem, span), False)
+                        tokens.push_back(self.vocab.get(span), False)
        cdef vector[const LexemeC*].reverse_iterator it = suffixes.rbegin()
        while it != suffixes.rend():
            lexeme = deref(it)
--- a/spacy/tokens/doc.pyx
+++ b/spacy/tokens/doc.pyx
@ -266,12 +266,12 @@ cdef class Doc:
        cdef const LexemeC* lexeme
        for word, has_space in zip(words, spaces):
            if isinstance(word, str):
-                lexeme = self.vocab.get(self.mem, word)
+                lexeme = self.vocab.get(word)
            elif isinstance(word, bytes):
                raise ValueError(Errors.E028.format(value=word))
            else:
                try:
-                    lexeme = self.vocab.get_by_orth(self.mem, word)
+                    lexeme = self.vocab.get_by_orth(word)
                except TypeError:
                    raise TypeError(Errors.E1022.format(wtype=type(word)))
            self.push_back(lexeme, has_space)
@ -1430,7 +1430,7 @@ cdef class Doc:
            end = start + attrs[i, 0]
            has_space = attrs[i, 1]
            orth_ = text[start:end]
-            lex = self.vocab.get(self.mem, orth_)
+            lex = self.vocab.get(orth_)
            self.push_back(lex, has_space)
            start = end + has_space
        self.from_array(msg["array_head"][2:], attrs[:, 2:])
@ -1536,7 +1536,7 @@ cdef class Doc:
        assert words == reconstructed_words

        for word, has_space in zip(words, spaces):
-            lex = self.vocab.get(self.mem, word)
+            lex = self.vocab.get(word)
            self.push_back(lex, has_space)

        # Set remaining token-level attributes via Doc.from_array().
--- a/spacy/tokens/retokenizer.pyx
+++ b/spacy/tokens/retokenizer.pyx
@ -223,7 +223,7 @@ def _merge(Doc doc, merges):
            if doc.vocab.vectors_length > 0:
                doc.vocab.set_vector(new_orth, span.vector)
        token = tokens[token_index]
-        lex = doc.vocab.get(doc.mem, new_orth)
+        lex = doc.vocab.get(new_orth)
        token.lex = lex
        # We set trailing space here too
        token.spacy = doc.c[spans[token_index].end-1].spacy
@ -359,7 +359,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
    cdef int idx_offset = 0
    for i, orth in enumerate(orths):
        token = &doc.c[token_index + i]
-        lex = doc.vocab.get(doc.mem, orth)
+        lex = doc.vocab.get(orth)
        token.lex = lex
        # If lemma is currently set, set default lemma to orth
        if token.lemma != 0:
--- a/spacy/vocab.pxd
+++ b/spacy/vocab.pxd
@ -35,8 +35,8 @@ cdef class Vocab:
    cdef public object lex_attr_getters
    cdef public object cfg

-    cdef const LexemeC* get(self, Pool mem, str string) except NULL
-    cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL
+    cdef const LexemeC* get(self, str string) except NULL
+    cdef const LexemeC* get_by_orth(self, attr_t orth) except NULL
    cdef const TokenC* make_fused_token(self, substrings) except NULL

    cdef const LexemeC* _new_lexeme(self, str string) except NULL
--- a/spacy/vocab.pyx
+++ b/spacy/vocab.pyx
@ -139,7 +139,7 @@ cdef class Vocab:
        self.lex_attr_getters[flag_id] = flag_getter
        return flag_id

-    cdef const LexemeC* get(self, Pool mem, str string) except NULL:
+    cdef const LexemeC* get(self, str string) except NULL:
        """Get a pointer to a `LexemeC` from the lexicon, creating a new
        `Lexeme` if necessary using memory acquired from the given pool. If the
        pool is the lexicon's own memory, the lexeme is saved in the lexicon.
@ -159,7 +159,7 @@ cdef class Vocab:
        else:
            return self._new_lexeme(string)

-    cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL:
+    cdef const LexemeC* get_by_orth(self, attr_t orth) except NULL:
        """Get a pointer to a `LexemeC` from the lexicon, creating a new
        `Lexeme` if necessary using memory acquired from the given pool. If the
        pool is the lexicon's own memory, the lexeme is saved in the lexicon.
@ -259,7 +259,7 @@ cdef class Vocab:
            props = intify_attrs(props, strings_map=self.strings)
            token = &tokens[i]
            # Set the special tokens up to have arbitrary attributes
-            lex = <LexemeC*>self.get_by_orth(self.mem, props[ORTH])
+            lex = <LexemeC*>self.get_by_orth(props[ORTH])
            token.lex = lex
            for attr_id, value in props.items():
                Token.set_struct_attr(token, attr_id, value)
--- a/website/docs/api/cython-classes.mdx
+++ b/website/docs/api/cython-classes.mdx
@ -163,12 +163,11 @@ vocabulary.
 > #### Example
 >
 > ```python
-> lexeme = vocab.get(vocab.mem, "hello")
+> lexeme = vocab.get("hello")
 > ```

 | Name        | Description                                                                                                |
 | ----------- | ---------------------------------------------------------------------------------------------------------- |
-| `mem`       | A memory pool. Allocated memory will be freed once the `Vocab` object is garbage collected. ~~cymem.Pool~~ |
 | `string`    | The string of the word to look up. ~~str~~                                                                 |
 | **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~                                                          |

@ -185,7 +184,6 @@ vocabulary.

 | Name        | Description                                                                                                |
 | ----------- | ---------------------------------------------------------------------------------------------------------- |
-| `mem`       | A memory pool. Allocated memory will be freed once the `Vocab` object is garbage collected. ~~cymem.Pool~~ |
 | `orth`      | ID of the verbatim text content. ~~attr_t (uint64_t)~~                                                     |
 | **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~                                                          |