mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-04 21:50:35 +03:00
* Remove Tokens.extend method
This commit is contained in:
parent
495e1c7366
commit
accdbe989b
|
@ -87,7 +87,7 @@ cdef class Language:
|
||||||
cdef int idx = 0
|
cdef int idx = 0
|
||||||
for i, py_string in enumerate(strings):
|
for i, py_string in enumerate(strings):
|
||||||
slice_unicode(&string_struct, py_string, 0, len(py_string))
|
slice_unicode(&string_struct, py_string, 0, len(py_string))
|
||||||
tokens.push_back(idx, self.lexicon.get(tokens.mem, &string_struct))
|
tokens.push_back(idx, <const Lexeme*>self.lexicon.get(tokens.mem, &string_struct))
|
||||||
idx += len(py_string) + 1
|
idx += len(py_string) + 1
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
|
@ -136,23 +136,19 @@ cdef class Language:
|
||||||
return tokens
|
return tokens
|
||||||
|
|
||||||
cdef int _try_cache(self, int idx, hash_t key, Tokens tokens) except -1:
|
cdef int _try_cache(self, int idx, hash_t key, Tokens tokens) except -1:
|
||||||
cdef int i
|
|
||||||
cdef TokenC* token
|
|
||||||
cached = <Cached*>self._specials.get(key)
|
cached = <Cached*>self._specials.get(key)
|
||||||
if cached != NULL:
|
if cached == NULL:
|
||||||
assert not cached.is_lex
|
|
||||||
for i in range(cached.length):
|
|
||||||
token = &cached.data.tokens[i]
|
|
||||||
idx = tokens.push_back(idx, token)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
cached = <Cached*>self._cache.get(key)
|
cached = <Cached*>self._cache.get(key)
|
||||||
if cached != NULL:
|
if cached == NULL:
|
||||||
assert cached.is_lex == True
|
|
||||||
tokens.extend(i, cached.data.lexemes, cached.length)
|
|
||||||
return True
|
|
||||||
else:
|
|
||||||
return False
|
return False
|
||||||
|
cdef int i
|
||||||
|
if cached.is_lex:
|
||||||
|
for i in range(cached.length):
|
||||||
|
idx = tokens.push_back(idx, cached.data.lexemes[i])
|
||||||
|
else:
|
||||||
|
for i in range(cached.length):
|
||||||
|
idx = tokens.push_back(idx, &cached.data.tokens[i])
|
||||||
|
return True
|
||||||
|
|
||||||
cdef int _tokenize(self, Tokens tokens, UniStr* span, int start, int end) except -1:
|
cdef int _tokenize(self, Tokens tokens, UniStr* span, int start, int end) except -1:
|
||||||
cdef vector[Lexeme*] prefixes
|
cdef vector[Lexeme*] prefixes
|
||||||
|
@ -215,8 +211,10 @@ cdef class Language:
|
||||||
cdef const Lexeme* const* lexemes
|
cdef const Lexeme* const* lexemes
|
||||||
cdef Lexeme* lexeme
|
cdef Lexeme* lexeme
|
||||||
cdef UniStr span
|
cdef UniStr span
|
||||||
|
cdef int i
|
||||||
if prefixes.size():
|
if prefixes.size():
|
||||||
idx = tokens.extend(idx, prefixes.data(), prefixes.size())
|
for i in range(prefixes.size()):
|
||||||
|
idx = tokens.push_back(idx, prefixes[0][i])
|
||||||
if string.n != 0:
|
if string.n != 0:
|
||||||
cache_hit = self._try_cache(idx, string.key, tokens)
|
cache_hit = self._try_cache(idx, string.key, tokens)
|
||||||
if cache_hit:
|
if cache_hit:
|
||||||
|
|
|
@ -47,7 +47,6 @@ cdef class Tokens:
|
||||||
cdef int length
|
cdef int length
|
||||||
cdef int max_length
|
cdef int max_length
|
||||||
|
|
||||||
cdef int extend(self, int i, const Lexeme* const* lexemes, int n) except -1
|
|
||||||
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
|
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
|
||||||
cpdef int set_tag(self, int i, int tag_type, int tag) except -1
|
cpdef int set_tag(self, int i, int tag_type, int tag) except -1
|
||||||
|
|
||||||
|
|
|
@ -71,20 +71,6 @@ cdef class Tokens:
|
||||||
self.length += 1
|
self.length += 1
|
||||||
return idx + t.lex.length
|
return idx + t.lex.length
|
||||||
|
|
||||||
cdef int extend(self, int idx, const Lexeme* const* lexemes, int n) except -1:
|
|
||||||
cdef int i
|
|
||||||
if lexemes == NULL:
|
|
||||||
return idx
|
|
||||||
elif n == 0:
|
|
||||||
i = 0
|
|
||||||
while lexemes[i] != NULL:
|
|
||||||
idx = self.push_back(idx, lexemes[i])
|
|
||||||
i += 1
|
|
||||||
else:
|
|
||||||
for i in range(n):
|
|
||||||
idx = self.push_back(idx, lexemes[i])
|
|
||||||
return idx
|
|
||||||
|
|
||||||
cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
|
cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
|
||||||
self.data[i].pos = tag
|
self.data[i].pos = tag
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user