* Hack Tokens to work without tagger.pyx

This commit is contained in:
Matthew Honnibal 2014-12-03 11:05:15 +11:00
parent b463a7eb86
commit e170faf5b0
2 changed files with 10 additions and 10 deletions

View File

@ -4,9 +4,8 @@ cimport numpy as np
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from .lexeme cimport Lexeme from .lexeme cimport Lexeme
from .typedefs cimport flag_t from .typedefs cimport flags_t
from .utf8string cimport StringStore from .utf8string cimport StringStore
from .tagger cimport TagType
from thinc.typedefs cimport atom_t from thinc.typedefs cimport atom_t
@ -29,7 +28,7 @@ cdef class Tokens:
cdef int extend(self, int i, Lexeme** lexemes, int n) except -1 cdef int extend(self, int i, Lexeme** lexemes, int n) except -1
cdef int push_back(self, int i, Lexeme* lexeme) except -1 cdef int push_back(self, int i, Lexeme* lexeme) except -1
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1 cpdef int set_tag(self, int i, int tag_type, int tag) except -1
cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features) cpdef np.ndarray[atom_t, ndim=2] get_array(self, list features)
@ -56,4 +55,4 @@ cdef class Token:
cdef public float prob cdef public float prob
cdef public flag_t flags cdef public flags_t flags

View File

@ -1,7 +1,9 @@
# cython: profile=True # cython: profile=True
from .lexeme cimport * from .lexeme cimport *
cimport cython cimport cython
from .tagger cimport POS, ENTITY
POS = 0
ENTITY = 0
DEF PADDING = 5 DEF PADDING = 5
@ -96,7 +98,7 @@ cdef class Tokens:
idx = self.push_back(idx, lexemes[i]) idx = self.push_back(idx, lexemes[i])
return idx return idx
cpdef int set_tag(self, int i, TagType tag_type, int tag) except -1: cpdef int set_tag(self, int i, int tag_type, int tag) except -1:
if tag_type == POS: if tag_type == POS:
self.pos[i] = tag self.pos[i] = tag
elif tag_type == ENTITY: elif tag_type == ENTITY:
@ -108,8 +110,7 @@ cdef class Tokens:
output = np.ndarray(shape=(self.length, len(features)), dtype=int) output = np.ndarray(shape=(self.length, len(features)), dtype=int)
for i in range(self.length): for i in range(self.length):
for j, feature in enumerate(features): for j, feature in enumerate(features):
output[i, j] = self.lex[i].sic output[i, j] = get_attr(self.lex[i], feature)
#output[i, j] = lexeme_get_feature(self.lex[i], feature)
return output return output
def _realloc(self, new_size): def _realloc(self, new_size):
@ -140,8 +141,8 @@ cdef class Token:
self.cluster = lex['cluster'] self.cluster = lex['cluster']
self.length = lex['length'] self.length = lex['length']
self.postype = lex['postype'] self.postype = lex['pos_type']
self.sensetype = lex['supersense'] self.sensetype = lex['sense_type']
self.sic = lex['sic'] self.sic = lex['sic']
self.norm = lex['norm'] self.norm = lex['norm']
self.shape = lex['shape'] self.shape = lex['shape']