mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
* Have Tokens return proper numpy arrays, not Cython views.
This commit is contained in:
parent
69507bc729
commit
5e94b5d581
|
@ -1,7 +1,7 @@
|
||||||
from libc.stdint cimport uint32_t
|
from libc.stdint cimport uint32_t
|
||||||
|
|
||||||
from numpy cimport ndarray
|
from numpy cimport ndarray
|
||||||
cimport numpy
|
cimport numpy as np
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
from thinc.typedefs cimport atom_t
|
from thinc.typedefs cimport atom_t
|
||||||
|
@ -47,7 +47,7 @@ cdef class Tokens:
|
||||||
|
|
||||||
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
|
cdef int push_back(self, int i, LexemeOrToken lex_or_tok) except -1
|
||||||
|
|
||||||
cpdef long[:,:] to_array(self, object features)
|
cpdef np.ndarray to_array(self, object features)
|
||||||
|
|
||||||
cdef int set_parse(self, const TokenC* parsed) except -1
|
cdef int set_parse(self, const TokenC* parsed) except -1
|
||||||
|
|
||||||
|
|
|
@ -18,7 +18,9 @@ from .structs cimport UniStr
|
||||||
|
|
||||||
from unidecode import unidecode
|
from unidecode import unidecode
|
||||||
|
|
||||||
cimport numpy
|
cimport numpy as np
|
||||||
|
np.import_array()
|
||||||
|
|
||||||
import numpy
|
import numpy
|
||||||
|
|
||||||
cimport cython
|
cimport cython
|
||||||
|
@ -207,7 +209,7 @@ cdef class Tokens:
|
||||||
return idx + t.lex.length
|
return idx + t.lex.length
|
||||||
|
|
||||||
@cython.boundscheck(False)
|
@cython.boundscheck(False)
|
||||||
cpdef long[:,:] to_array(self, object py_attr_ids):
|
cpdef np.ndarray to_array(self, object py_attr_ids):
|
||||||
"""Given a list of M attribute IDs, export the tokens to a numpy ndarray
|
"""Given a list of M attribute IDs, export the tokens to a numpy ndarray
|
||||||
of shape N*M, where N is the length of the sentence.
|
of shape N*M, where N is the length of the sentence.
|
||||||
|
|
||||||
|
@ -221,10 +223,10 @@ cdef class Tokens:
|
||||||
"""
|
"""
|
||||||
cdef int i, j
|
cdef int i, j
|
||||||
cdef attr_id_t feature
|
cdef attr_id_t feature
|
||||||
cdef numpy.ndarray[long, ndim=2] output
|
cdef np.ndarray[long, ndim=2] output
|
||||||
# Make an array from the attributes --- otherwise our inner loop is Python
|
# Make an array from the attributes --- otherwise our inner loop is Python
|
||||||
# dict iteration.
|
# dict iteration.
|
||||||
cdef numpy.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids)
|
cdef np.ndarray[long, ndim=1] attr_ids = numpy.asarray(py_attr_ids)
|
||||||
output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.int)
|
output = numpy.ndarray(shape=(self.length, len(attr_ids)), dtype=numpy.int)
|
||||||
for i in range(self.length):
|
for i in range(self.length):
|
||||||
for j, feature in enumerate(attr_ids):
|
for j, feature in enumerate(attr_ids):
|
||||||
|
@ -464,7 +466,9 @@ cdef class Token:
|
||||||
|
|
||||||
property repvec:
|
property repvec:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
return numpy.asarray(<float[:self.vocab.repvec_length,]> self.c.lex.repvec)
|
cdef int length = self.vocab.repvec_length
|
||||||
|
repvec_view = <float[:length,]>self.c.lex.repvec
|
||||||
|
return numpy.asarray(repvec_view)
|
||||||
|
|
||||||
property n_lefts:
|
property n_lefts:
|
||||||
def __get__(self):
|
def __get__(self):
|
||||||
|
|
Loading…
Reference in New Issue
Block a user