* Have Tokens return proper numpy arrays, not Cython views.

This commit is contained in:
Matthew Honnibal 2015-06-23 00:07:06 +02:00
parent 69507bc729
commit 5e94b5d581
2 changed files with 11 additions and 7 deletions

View File

@ -1,7 +1,7 @@
from libc.stdint cimport uint32_t from libc.stdint cimport uint32_t
from numpy cimport ndarray from numpy cimport ndarray
cimport numpy cimport numpy as np
from cymem.cymem cimport Pool from cymem.cymem cimport Pool
from thinc.typedefs cimport atom_t from thinc.typedefs cimport atom_t
@ -47,7 +47,7 @@ cdef class Tokens:
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1 cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
cpdef long[:,:] to_array(self, object features) cpdef np.ndarray to_array(self, object features)
cdef int set_parse(self, const TokenC* parsed) except -1 cdef int set_parse(self, const TokenC* parsed) except -1

View File

@ -18,7 +18,9 @@ from .structs cimport UniStr
from unidecode import unidecode from unidecode import unidecode
cimport numpy cimport numpy as np
np.import_array()
import numpy import numpy
cimport cython cimport cython
@ -207,7 +209,7 @@ cdef class Tokens:
return idx + t.lex.length return idx + t.lex.length
@cython.boundscheck(False) @cython.boundscheck(False)
cpdef long[:,:] to_array(self, object py_attr_ids): cpdef np.ndarray to_array(self, object py_attr_ids):
"""Given a list of M attribute IDs, export the tokens to a numpy ndarray """Given a list of M attribute IDs, export the tokens to a numpy ndarray
of shape N*M, where N is the length of the sentence. of shape N*M, where N is the length of the sentence.
@ -221,10 +223,10 @@ cdef class Tokens:
""" """
cdef int i, j cdef int i, j
cdef attr_id_t feature cdef attr_id_t feature
cdef numpy.ndarray[long, ndim=2] output cdef np.ndarray[long, ndim=2] output
# Make an array from the attributes --- otherwise our inner loop is Python # Make an array from the attributes --- otherwise our inner loop is Python
# dict iteration. # dict iteration.
cdef numpy.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids) cdef np.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids)
output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.int) output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.int)
for i in range(self.length): for i in range(self.length):
for j, feature in enumerate(attr_ids): for j, feature in enumerate(attr_ids):
@ -464,7 +466,9 @@ cdef class Token:
property repvec: property repvec:
def __get__(self): def __get__(self):
return numpy.asarray(<float[:self.vocab.repvec_length,]> self.c.lex.repvec) cdef int length = self.vocab.repvec_length
repvec_view = <float[:length,]>self.c.lex.repvec
return numpy.asarray(repvec_view)
property n_lefts: property n_lefts:
def __get__(self): def __get__(self):