2017-04-15 14:05:15 +03:00
|
|
|
# coding: utf-8
|
2017-05-06 15:22:20 +03:00
|
|
|
# cython: infer_types=True
|
2017-04-15 14:05:15 +03:00
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
2015-06-09 02:39:54 +03:00
|
|
|
from libc.string cimport memcpy, memset
|
2017-05-06 15:22:20 +03:00
|
|
|
from libc.stdint cimport uint32_t, uint64_t
|
2017-04-15 14:05:15 +03:00
|
|
|
|
2015-06-09 22:20:14 +03:00
|
|
|
from ..vocab cimport EMPTY_LEXEME
|
2015-06-10 05:20:23 +03:00
|
|
|
from ..structs cimport Entity
|
2016-01-19 04:54:15 +03:00
|
|
|
from ..lexeme cimport Lexeme
|
|
|
|
from ..symbols cimport punct
|
|
|
|
from ..attrs cimport IS_SPACE
|
2017-05-06 15:22:20 +03:00
|
|
|
from ..attrs cimport attr_id_t
|
|
|
|
from ..tokens.token cimport Token
|
2015-06-09 02:39:54 +03:00
|
|
|
|
|
|
|
|
|
|
|
cdef class StateClass:
|
2015-06-09 22:20:14 +03:00
|
|
|
def __init__(self, int length):
|
|
|
|
cdef Pool mem = Pool()
|
|
|
|
self.mem = mem
|
2016-02-01 04:22:21 +03:00
|
|
|
|
|
|
|
def __dealloc__(self):
|
|
|
|
del self.c
|
|
|
|
|
2015-08-09 00:32:42 +03:00
|
|
|
@property
|
|
|
|
def stack(self):
|
2016-04-13 16:28:28 +03:00
|
|
|
return {self.S(i) for i in range(self.c._s_i)}
|
2015-08-09 00:32:42 +03:00
|
|
|
|
|
|
|
@property
|
|
|
|
def queue(self):
|
2016-10-16 18:04:41 +03:00
|
|
|
return {self.B(i) for i in range(self.c.buffer_length())}
|
2015-08-09 00:32:42 +03:00
|
|
|
|
2017-05-06 15:22:20 +03:00
|
|
|
@property
|
|
|
|
def token_vector_lenth(self):
|
|
|
|
return self.doc.tensor.shape[1]
|
|
|
|
|
2017-05-06 17:47:15 +03:00
|
|
|
def py_is_final(self):
|
2017-05-06 15:22:20 +03:00
|
|
|
return self.c.is_final()
|
|
|
|
|
2015-06-10 02:35:28 +03:00
|
|
|
def print_state(self, words):
|
|
|
|
words = list(words) + ['_']
|
2015-06-10 11:13:03 +03:00
|
|
|
top = words[self.S(0)] + '_%d' % self.S_(0).head
|
|
|
|
second = words[self.S(1)] + '_%d' % self.S_(1).head
|
|
|
|
third = words[self.S(2)] + '_%d' % self.S_(2).head
|
2017-04-15 14:05:15 +03:00
|
|
|
n0 = words[self.B(0)]
|
|
|
|
n1 = words[self.B(1)]
|
2015-06-14 18:44:29 +03:00
|
|
|
return ' '.join((third, second, top, '|', n0, n1))
|
2017-05-06 15:22:20 +03:00
|
|
|
|
2017-05-06 21:38:12 +03:00
|
|
|
@classmethod
|
|
|
|
def nr_context_tokens(cls, int nF, int nB, int nS, int nL, int nR):
|
2017-05-07 04:57:26 +03:00
|
|
|
return 11
|
2017-05-06 15:22:20 +03:00
|
|
|
|
|
|
|
def set_context_tokens(self, int[:] output, nF=1, nB=0, nS=2,
|
|
|
|
nL=2, nR=2):
|
|
|
|
output[0] = self.B(0)
|
2017-05-06 18:37:36 +03:00
|
|
|
output[1] = self.B(1)
|
|
|
|
output[2] = self.S(0)
|
|
|
|
output[3] = self.S(1)
|
2017-05-07 04:57:26 +03:00
|
|
|
output[4] = self.L(self.S(0), 1)
|
|
|
|
output[5] = self.L(self.S(0), 2)
|
|
|
|
output[6] = self.R(self.S(0), 1)
|
|
|
|
output[7] = self.R(self.S(0), 2)
|
|
|
|
output[7] = self.L(self.S(1), 1)
|
|
|
|
output[8] = self.L(self.S(1), 2)
|
|
|
|
output[9] = self.R(self.S(1), 1)
|
|
|
|
output[10] = self.R(self.S(1), 2)
|
2017-05-06 15:22:20 +03:00
|
|
|
|
|
|
|
def set_attributes(self, uint64_t[:, :] vals, int[:] tokens, int[:] names):
|
|
|
|
cdef int i, j, tok_i
|
|
|
|
for i in range(tokens.shape[0]):
|
|
|
|
tok_i = tokens[i]
|
2017-05-06 17:47:15 +03:00
|
|
|
if tok_i >= 0:
|
|
|
|
token = &self.c._sent[tok_i]
|
|
|
|
for j in range(names.shape[0]):
|
|
|
|
vals[i, j] = Token.get_struct_attr(token, <attr_id_t>names[j])
|
|
|
|
else:
|
|
|
|
vals[i] = 0
|
2017-05-06 15:22:20 +03:00
|
|
|
|
2017-05-07 19:04:24 +03:00
|
|
|
def set_token_vectors(self, tokvecs,
|
|
|
|
all_tokvecs, int[:] indices):
|
2017-05-06 15:22:20 +03:00
|
|
|
for i in range(indices.shape[0]):
|
2017-05-06 17:47:15 +03:00
|
|
|
if indices[i] >= 0:
|
|
|
|
tokvecs[i] = all_tokvecs[indices[i]]
|
|
|
|
else:
|
|
|
|
tokvecs[i] = 0
|