mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-27 09:44:36 +03:00
* Hack Tokens to work without tagger.pyx
This commit is contained in:
parent
b463a7eb86
commit
e170faf5b0
|
@ -4,9 +4,8 @@ cimport numpy as np
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
from .lexeme cimport Lexeme
|
from .lexeme cimport Lexeme
|
||||||
from .typedefs cimport flag_t
|
from .typedefs cimport flags_t
|
||||||
from .utf8string cimport StringStore
|
from .utf8string cimport StringStore
|
||||||
from .tagger cimport TagType
|
|
||||||
|
|
||||||
from thinc.typedefs cimport atom_t
|
from thinc.typedefs cimport atom_t
|
||||||
|
|
||||||
|
@ -29,7 +28,7 @@ cdef class Tokens:
|
||||||
|
|
||||||
cdef int extend(self, int i, Lexeme** lexemes, int n) except -1
|
cdef int extend(self, int i, Lexeme** lexemes, int n) except -1
|
||||||
cdef int push_back(self, int i, Lexeme* lexeme) except -1
|
cdef int push_back(self, int i, Lexeme* lexeme) except -1
|
||||||
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1
|
cpdef int set_tag(self, int i, int tag_type, int tag) except -1
|
||||||
|
|
||||||
cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features)
|
cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features)
|
||||||
|
|
||||||
|
@ -56,4 +55,4 @@ cdef class Token:
|
||||||
|
|
||||||
cdef public float prob
|
cdef public float prob
|
||||||
|
|
||||||
cdef public flag_t flags
|
cdef public flags_t flags
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
# cython: profile=True
|
# cython: profile=True
|
||||||
from .lexeme cimport *
|
from .lexeme cimport *
|
||||||
cimport cython
|
cimport cython
|
||||||
from .tagger cimport POS, ENTITY
|
|
||||||
|
POS = 0
|
||||||
|
ENTITY = 0
|
||||||
|
|
||||||
DEF PADDING = 5
|
DEF PADDING = 5
|
||||||
|
|
||||||
|
@ -96,7 +98,7 @@ cdef class Tokens:
|
||||||
idx = self.push_back(idx, lexemes[i])
|
idx = self.push_back(idx, lexemes[i])
|
||||||
return idx
|
return idx
|
||||||
|
|
||||||
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1:
|
cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
|
||||||
if tag_type == POS:
|
if tag_type == POS:
|
||||||
self.pos[i] = tag
|
self.pos[i] = tag
|
||||||
elif tag_type == ENTITY:
|
elif tag_type == ENTITY:
|
||||||
|
@ -108,8 +110,7 @@ cdef class Tokens:
|
||||||
output = np.ndarray(shape=(self.length, len(features)), dtype=int)
|
output = np.ndarray(shape=(self.length, len(features)), dtype=int)
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
for j, feature in enumerate(features):
|
for j, feature in enumerate(features):
|
||||||
output[i, j] = self.lex[i].sic
|
output[i, j] = get_attr(self.lex[i], feature)
|
||||||
#output[i, j] = lexeme_get_feature(self.lex[i], feature)
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
def _realloc(self, new_size):
|
def _realloc(self, new_size):
|
||||||
|
@ -140,8 +141,8 @@ cdef class Token:
|
||||||
|
|
||||||
self.cluster = lex['cluster']
|
self.cluster = lex['cluster']
|
||||||
self.length = lex['length']
|
self.length = lex['length']
|
||||||
self.postype = lex['postype']
|
self.postype = lex['pos_type']
|
||||||
self.sensetype = lex['supersense']
|
self.sensetype = lex['sense_type']
|
||||||
self.sic = lex['sic']
|
self.sic = lex['sic']
|
||||||
self.norm = lex['norm']
|
self.norm = lex['norm']
|
||||||
self.shape = lex['shape']
|
self.shape = lex['shape']
|
||||||
|
|
Loading…
Reference in New Issue
Block a user