mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-11 04:08:09 +03:00
* Remove Tokens.extend method
This commit is contained in:
parent
495e1c7366
commit
accdbe989b
|
@ -87,7 +87,7 @@ cdef class Language:
|
|||
cdef int idx = 0
|
||||
for i, py_string in enumerate(strings):
|
||||
slice_unicode(&string_struct, py_string, 0, len(py_string))
|
||||
tokens.push_back(idx, self.lexicon.get(tokens.mem, &string_struct))
|
||||
tokens.push_back(idx, <const Lexeme*>self.lexicon.get(tokens.mem, &string_struct))
|
||||
idx += len(py_string) + 1
|
||||
return tokens
|
||||
|
||||
|
@ -136,23 +136,19 @@ cdef class Language:
|
|||
return tokens
|
||||
|
||||
cdef int _try_cache(self, int idx, hash_t key, Tokens tokens) except -1:
|
||||
cdef int i
|
||||
cdef TokenC* token
|
||||
cached = <Cached*>self._specials.get(key)
|
||||
if cached != NULL:
|
||||
assert not cached.is_lex
|
||||
for i in range(cached.length):
|
||||
token = &cached.data.tokens[i]
|
||||
idx = tokens.push_back(idx, token)
|
||||
return True
|
||||
else:
|
||||
if cached == NULL:
|
||||
cached = <Cached*>self._cache.get(key)
|
||||
if cached != NULL:
|
||||
assert cached.is_lex == True
|
||||
tokens.extend(i, cached.data.lexemes, cached.length)
|
||||
return True
|
||||
else:
|
||||
if cached == NULL:
|
||||
return False
|
||||
cdef int i
|
||||
if cached.is_lex:
|
||||
for i in range(cached.length):
|
||||
idx = tokens.push_back(idx, cached.data.lexemes[i])
|
||||
else:
|
||||
for i in range(cached.length):
|
||||
idx = tokens.push_back(idx, &cached.data.tokens[i])
|
||||
return True
|
||||
|
||||
cdef int _tokenize(self, Tokens tokens, UniStr* span, int start, int end) except -1:
|
||||
cdef vector[Lexeme*] prefixes
|
||||
|
@ -215,8 +211,10 @@ cdef class Language:
|
|||
cdef const Lexeme* const* lexemes
|
||||
cdef Lexeme* lexeme
|
||||
cdef UniStr span
|
||||
cdef int i
|
||||
if prefixes.size():
|
||||
idx = tokens.extend(idx, prefixes.data(), prefixes.size())
|
||||
for i in range(prefixes.size()):
|
||||
idx = tokens.push_back(idx, prefixes[0][i])
|
||||
if string.n != 0:
|
||||
cache_hit = self._try_cache(idx, string.key, tokens)
|
||||
if cache_hit:
|
||||
|
|
|
@ -47,7 +47,6 @@ cdef class Tokens:
|
|||
cdef int length
|
||||
cdef int max_length
|
||||
|
||||
cdef int extend(self, int i, const Lexeme* const* lexemes, int n) except -1
|
||||
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
|
||||
cpdef int set_tag(self, int i, int tag_type, int tag) except -1
|
||||
|
||||
|
|
|
@ -71,20 +71,6 @@ cdef class Tokens:
|
|||
self.length += 1
|
||||
return idx + t.lex.length
|
||||
|
||||
cdef int extend(self, int idx, const Lexeme* const* lexemes, int n) except -1:
|
||||
cdef int i
|
||||
if lexemes == NULL:
|
||||
return idx
|
||||
elif n == 0:
|
||||
i = 0
|
||||
while lexemes[i] != NULL:
|
||||
idx = self.push_back(idx, lexemes[i])
|
||||
i += 1
|
||||
else:
|
||||
for i in range(n):
|
||||
idx = self.push_back(idx, lexemes[i])
|
||||
return idx
|
||||
|
||||
cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
|
||||
self.data[i].pos = tag
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user