mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
* Remove UniStr struct
This commit is contained in:
parent
2fc66e3723
commit
815bda201d
|
@ -7,9 +7,6 @@ from libc.math cimport exp as c_exp
|
||||||
from libcpp.queue cimport priority_queue
|
from libcpp.queue cimport priority_queue
|
||||||
from libcpp.pair cimport pair
|
from libcpp.pair cimport pair
|
||||||
|
|
||||||
from ..structs cimport UniStr
|
|
||||||
from ..strings cimport slice_unicode
|
|
||||||
|
|
||||||
from cymem.cymem cimport Address, Pool
|
from cymem.cymem cimport Address, Pool
|
||||||
from preshed.maps cimport PreshMap
|
from preshed.maps cimport PreshMap
|
||||||
from preshed.counter cimport PreshCounter
|
from preshed.counter cimport PreshCounter
|
||||||
|
@ -175,14 +172,13 @@ cdef class Packer:
|
||||||
cdef Doc tokens = Doc(self.vocab)
|
cdef Doc tokens = Doc(self.vocab)
|
||||||
cdef int start = 0
|
cdef int start = 0
|
||||||
cdef bint is_spacy
|
cdef bint is_spacy
|
||||||
cdef UniStr span
|
|
||||||
cdef int length = len(string)
|
cdef int length = len(string)
|
||||||
cdef int i = 0
|
cdef int i = 0
|
||||||
cdef bint is_end_token
|
cdef bint is_end_token
|
||||||
for is_end_token in bits:
|
for is_end_token in bits:
|
||||||
if is_end_token:
|
if is_end_token:
|
||||||
slice_unicode(&span, string, start, i+1)
|
span = string[start:i+1]
|
||||||
lex = self.vocab.get(tokens.mem, &span)
|
lex = self.vocab.get(tokens.mem, span)
|
||||||
is_spacy = (i+1) < length and string[i+1] == u' '
|
is_spacy = (i+1) < length and string[i+1] == u' '
|
||||||
tokens.push_back(lex, is_spacy)
|
tokens.push_back(lex, is_spacy)
|
||||||
start = i + 1 + is_spacy
|
start = i + 1 + is_spacy
|
||||||
|
|
|
@ -78,9 +78,3 @@ cdef struct TokenC:
|
||||||
|
|
||||||
int ent_iob
|
int ent_iob
|
||||||
int ent_type
|
int ent_type
|
||||||
|
|
||||||
|
|
||||||
cdef struct UniStr:
|
|
||||||
Py_UNICODE* chars
|
|
||||||
size_t n
|
|
||||||
hash_t key
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user