Remove unused mem from function args

Two methods calling _new_lexeme, get and get_by_orth, took mem arguments
just to call the internal method. That's no longer necessary, so this
cleans it up.
This commit is contained in:
Paul O'Leary McCann 2023-01-23 13:12:51 +09:00
parent 51eb625cec
commit ea81e747f3
7 changed files with 24 additions and 26 deletions

View File

@ -41,7 +41,7 @@ cdef class Lexeme:
""" """
self.vocab = vocab self.vocab = vocab
self.orth = orth self.orth = orth
self.c = <LexemeC*><void*>vocab.get_by_orth(vocab.mem, orth) self.c = <LexemeC*><void*>vocab.get_by_orth(orth)
if self.c.orth != orth: if self.c.orth != orth:
raise ValueError(Errors.E071.format(orth=orth, vocab_orth=self.c.orth)) raise ValueError(Errors.E071.format(orth=orth, vocab_orth=self.c.orth))

View File

@ -419,7 +419,7 @@ cdef class Tokenizer:
minus_pre = string[pre_len:] minus_pre = string[pre_len:]
if minus_pre and with_special_cases and self._specials.get(hash_string(minus_pre)) != NULL: if minus_pre and with_special_cases and self._specials.get(hash_string(minus_pre)) != NULL:
string = minus_pre string = minus_pre
prefixes.push_back(self.vocab.get(mem, prefix)) prefixes.push_back(self.vocab.get(prefix))
break break
suf_len = self.find_suffix(string[pre_len:]) suf_len = self.find_suffix(string[pre_len:])
if suf_len != 0: if suf_len != 0:
@ -427,18 +427,18 @@ cdef class Tokenizer:
minus_suf = string[:-suf_len] minus_suf = string[:-suf_len]
if minus_suf and with_special_cases and self._specials.get(hash_string(minus_suf)) != NULL: if minus_suf and with_special_cases and self._specials.get(hash_string(minus_suf)) != NULL:
string = minus_suf string = minus_suf
suffixes.push_back(self.vocab.get(mem, suffix)) suffixes.push_back(self.vocab.get(suffix))
break break
if pre_len and suf_len and (pre_len + suf_len) <= len(string): if pre_len and suf_len and (pre_len + suf_len) <= len(string):
string = string[pre_len:-suf_len] string = string[pre_len:-suf_len]
prefixes.push_back(self.vocab.get(mem, prefix)) prefixes.push_back(self.vocab.get(prefix))
suffixes.push_back(self.vocab.get(mem, suffix)) suffixes.push_back(self.vocab.get(suffix))
elif pre_len: elif pre_len:
string = minus_pre string = minus_pre
prefixes.push_back(self.vocab.get(mem, prefix)) prefixes.push_back(self.vocab.get(prefix))
elif suf_len: elif suf_len:
string = minus_suf string = minus_suf
suffixes.push_back(self.vocab.get(mem, suffix)) suffixes.push_back(self.vocab.get(suffix))
return string return string
cdef int _attach_tokens(self, Doc tokens, str string, cdef int _attach_tokens(self, Doc tokens, str string,
@ -465,11 +465,11 @@ cdef class Tokenizer:
# We're always saying 'no' to spaces here -- the caller will # We're always saying 'no' to spaces here -- the caller will
# fix up the outermost one, with reference to the original. # fix up the outermost one, with reference to the original.
# See Issue #859 # See Issue #859
tokens.push_back(self.vocab.get(tokens.mem, string), False) tokens.push_back(self.vocab.get(string), False)
else: else:
matches = self.find_infix(string) matches = self.find_infix(string)
if not matches: if not matches:
tokens.push_back(self.vocab.get(tokens.mem, string), False) tokens.push_back(self.vocab.get(string), False)
else: else:
# Let's say we have dyn-o-mite-dave - the regex finds the # Let's say we have dyn-o-mite-dave - the regex finds the
# start and end positions of the hyphens # start and end positions of the hyphens
@ -484,7 +484,7 @@ cdef class Tokenizer:
if infix_start != start: if infix_start != start:
span = string[start:infix_start] span = string[start:infix_start]
tokens.push_back(self.vocab.get(tokens.mem, span), False) tokens.push_back(self.vocab.get(span), False)
if infix_start != infix_end: if infix_start != infix_end:
# If infix_start != infix_end, it means the infix # If infix_start != infix_end, it means the infix
@ -492,11 +492,11 @@ cdef class Tokenizer:
# for tokenization in some languages (see # for tokenization in some languages (see
# https://github.com/explosion/spaCy/issues/768) # https://github.com/explosion/spaCy/issues/768)
infix_span = string[infix_start:infix_end] infix_span = string[infix_start:infix_end]
tokens.push_back(self.vocab.get(tokens.mem, infix_span), False) tokens.push_back(self.vocab.get(infix_span), False)
start = infix_end start = infix_end
span = string[start:] span = string[start:]
if span: if span:
tokens.push_back(self.vocab.get(tokens.mem, span), False) tokens.push_back(self.vocab.get(span), False)
cdef vector[const LexemeC*].reverse_iterator it = suffixes.rbegin() cdef vector[const LexemeC*].reverse_iterator it = suffixes.rbegin()
while it != suffixes.rend(): while it != suffixes.rend():
lexeme = deref(it) lexeme = deref(it)

View File

@ -266,12 +266,12 @@ cdef class Doc:
cdef const LexemeC* lexeme cdef const LexemeC* lexeme
for word, has_space in zip(words, spaces): for word, has_space in zip(words, spaces):
if isinstance(word, str): if isinstance(word, str):
lexeme = self.vocab.get(self.mem, word) lexeme = self.vocab.get(word)
elif isinstance(word, bytes): elif isinstance(word, bytes):
raise ValueError(Errors.E028.format(value=word)) raise ValueError(Errors.E028.format(value=word))
else: else:
try: try:
lexeme = self.vocab.get_by_orth(self.mem, word) lexeme = self.vocab.get_by_orth(word)
except TypeError: except TypeError:
raise TypeError(Errors.E1022.format(wtype=type(word))) raise TypeError(Errors.E1022.format(wtype=type(word)))
self.push_back(lexeme, has_space) self.push_back(lexeme, has_space)
@ -1430,7 +1430,7 @@ cdef class Doc:
end = start + attrs[i, 0] end = start + attrs[i, 0]
has_space = attrs[i, 1] has_space = attrs[i, 1]
orth_ = text[start:end] orth_ = text[start:end]
lex = self.vocab.get(self.mem, orth_) lex = self.vocab.get(orth_)
self.push_back(lex, has_space) self.push_back(lex, has_space)
start = end + has_space start = end + has_space
self.from_array(msg["array_head"][2:], attrs[:, 2:]) self.from_array(msg["array_head"][2:], attrs[:, 2:])
@ -1536,7 +1536,7 @@ cdef class Doc:
assert words == reconstructed_words assert words == reconstructed_words
for word, has_space in zip(words, spaces): for word, has_space in zip(words, spaces):
lex = self.vocab.get(self.mem, word) lex = self.vocab.get(word)
self.push_back(lex, has_space) self.push_back(lex, has_space)
# Set remaining token-level attributes via Doc.from_array(). # Set remaining token-level attributes via Doc.from_array().

View File

@ -223,7 +223,7 @@ def _merge(Doc doc, merges):
if doc.vocab.vectors_length > 0: if doc.vocab.vectors_length > 0:
doc.vocab.set_vector(new_orth, span.vector) doc.vocab.set_vector(new_orth, span.vector)
token = tokens[token_index] token = tokens[token_index]
lex = doc.vocab.get(doc.mem, new_orth) lex = doc.vocab.get(new_orth)
token.lex = lex token.lex = lex
# We set trailing space here too # We set trailing space here too
token.spacy = doc.c[spans[token_index].end-1].spacy token.spacy = doc.c[spans[token_index].end-1].spacy
@ -359,7 +359,7 @@ def _split(Doc doc, int token_index, orths, heads, attrs):
cdef int idx_offset = 0 cdef int idx_offset = 0
for i, orth in enumerate(orths): for i, orth in enumerate(orths):
token = &doc.c[token_index + i] token = &doc.c[token_index + i]
lex = doc.vocab.get(doc.mem, orth) lex = doc.vocab.get(orth)
token.lex = lex token.lex = lex
# If lemma is currently set, set default lemma to orth # If lemma is currently set, set default lemma to orth
if token.lemma != 0: if token.lemma != 0:

View File

@ -35,8 +35,8 @@ cdef class Vocab:
cdef public object lex_attr_getters cdef public object lex_attr_getters
cdef public object cfg cdef public object cfg
cdef const LexemeC* get(self, Pool mem, str string) except NULL cdef const LexemeC* get(self, str string) except NULL
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL cdef const LexemeC* get_by_orth(self, attr_t orth) except NULL
cdef const TokenC* make_fused_token(self, substrings) except NULL cdef const TokenC* make_fused_token(self, substrings) except NULL
cdef const LexemeC* _new_lexeme(self, str string) except NULL cdef const LexemeC* _new_lexeme(self, str string) except NULL

View File

@ -139,7 +139,7 @@ cdef class Vocab:
self.lex_attr_getters[flag_id] = flag_getter self.lex_attr_getters[flag_id] = flag_getter
return flag_id return flag_id
cdef const LexemeC* get(self, Pool mem, str string) except NULL: cdef const LexemeC* get(self, str string) except NULL:
"""Get a pointer to a `LexemeC` from the lexicon, creating a new """Get a pointer to a `LexemeC` from the lexicon, creating a new
`Lexeme` if necessary using memory acquired from the given pool. If the `Lexeme` if necessary using memory acquired from the given pool. If the
pool is the lexicon's own memory, the lexeme is saved in the lexicon. pool is the lexicon's own memory, the lexeme is saved in the lexicon.
@ -159,7 +159,7 @@ cdef class Vocab:
else: else:
return self._new_lexeme(string) return self._new_lexeme(string)
cdef const LexemeC* get_by_orth(self, Pool mem, attr_t orth) except NULL: cdef const LexemeC* get_by_orth(self, attr_t orth) except NULL:
"""Get a pointer to a `LexemeC` from the lexicon, creating a new """Get a pointer to a `LexemeC` from the lexicon, creating a new
`Lexeme` if necessary using memory acquired from the given pool. If the `Lexeme` if necessary using memory acquired from the given pool. If the
pool is the lexicon's own memory, the lexeme is saved in the lexicon. pool is the lexicon's own memory, the lexeme is saved in the lexicon.
@ -259,7 +259,7 @@ cdef class Vocab:
props = intify_attrs(props, strings_map=self.strings) props = intify_attrs(props, strings_map=self.strings)
token = &tokens[i] token = &tokens[i]
# Set the special tokens up to have arbitrary attributes # Set the special tokens up to have arbitrary attributes
lex = <LexemeC*>self.get_by_orth(self.mem, props[ORTH]) lex = <LexemeC*>self.get_by_orth(props[ORTH])
token.lex = lex token.lex = lex
for attr_id, value in props.items(): for attr_id, value in props.items():
Token.set_struct_attr(token, attr_id, value) Token.set_struct_attr(token, attr_id, value)

View File

@ -163,12 +163,11 @@ vocabulary.
> #### Example > #### Example
> >
> ```python > ```python
> lexeme = vocab.get(vocab.mem, "hello") > lexeme = vocab.get("hello")
> ``` > ```
| Name | Description | | Name | Description |
| ----------- | ---------------------------------------------------------------------------------------------------------- | | ----------- | ---------------------------------------------------------------------------------------------------------- |
| `mem` | A memory pool. Allocated memory will be freed once the `Vocab` object is garbage collected. ~~cymem.Pool~~ |
| `string` | The string of the word to look up. ~~str~~ | | `string` | The string of the word to look up. ~~str~~ |
| **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~ | | **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~ |
@ -185,7 +184,6 @@ vocabulary.
| Name | Description | | Name | Description |
| ----------- | ---------------------------------------------------------------------------------------------------------- | | ----------- | ---------------------------------------------------------------------------------------------------------- |
| `mem` | A memory pool. Allocated memory will be freed once the `Vocab` object is garbage collected. ~~cymem.Pool~~ |
| `orth` | ID of the verbatim text content. ~~attr_t (uint64_t)~~ | | `orth` | ID of the verbatim text content. ~~attr_t (uint64_t)~~ |
| **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~ | | **RETURNS** | The lexeme in the vocabulary. ~~const LexemeC\*~~ |