mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 02:16:32 +03:00
* Shadow StateClass with StateC, to start proxying
This commit is contained in:
parent
2fa228458e
commit
2169bbb7ea
|
@ -5,6 +5,8 @@ from cymem.cymem cimport Pool
|
||||||
from ..structs cimport TokenC, Entity
|
from ..structs cimport TokenC, Entity
|
||||||
|
|
||||||
from ..vocab cimport EMPTY_LEXEME
|
from ..vocab cimport EMPTY_LEXEME
|
||||||
|
from ._state cimport StateC
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef class StateClass:
|
cdef class StateClass:
|
||||||
|
@ -12,6 +14,7 @@ cdef class StateClass:
|
||||||
cdef int* _stack
|
cdef int* _stack
|
||||||
cdef int* _buffer
|
cdef int* _buffer
|
||||||
cdef bint* shifted
|
cdef bint* shifted
|
||||||
|
cdef StateC* c
|
||||||
cdef TokenC* _sent
|
cdef TokenC* _sent
|
||||||
cdef Entity* _ents
|
cdef Entity* _ents
|
||||||
cdef TokenC _empty_token
|
cdef TokenC _empty_token
|
||||||
|
@ -30,43 +33,55 @@ cdef class StateClass:
|
||||||
self._buffer[i] = i
|
self._buffer[i] = i
|
||||||
for i in range(length, length + 5):
|
for i in range(length, length + 5):
|
||||||
self._sent[i].lex = &EMPTY_LEXEME
|
self._sent[i].lex = &EMPTY_LEXEME
|
||||||
|
|
||||||
|
self.c = new StateC(sent, length)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
cdef inline int S(self, int i) nogil:
|
cdef inline int S(self, int i) nogil:
|
||||||
|
self.c.S(i)
|
||||||
if i >= self._s_i:
|
if i >= self._s_i:
|
||||||
return -1
|
return -1
|
||||||
return self._stack[self._s_i - (i+1)]
|
return self._stack[self._s_i - (i+1)]
|
||||||
|
|
||||||
cdef inline int B(self, int i) nogil:
|
cdef inline int B(self, int i) nogil:
|
||||||
|
self.c.B(i)
|
||||||
if (i + self._b_i) >= self.length:
|
if (i + self._b_i) >= self.length:
|
||||||
return -1
|
return -1
|
||||||
return self._buffer[self._b_i + i]
|
return self._buffer[self._b_i + i]
|
||||||
|
|
||||||
cdef inline const TokenC* S_(self, int i) nogil:
|
cdef inline const TokenC* S_(self, int i) nogil:
|
||||||
|
self.c.S_(i)
|
||||||
return self.safe_get(self.S(i))
|
return self.safe_get(self.S(i))
|
||||||
|
|
||||||
cdef inline const TokenC* B_(self, int i) nogil:
|
cdef inline const TokenC* B_(self, int i) nogil:
|
||||||
|
self.c.B_(i)
|
||||||
return self.safe_get(self.B(i))
|
return self.safe_get(self.B(i))
|
||||||
|
|
||||||
cdef inline const TokenC* H_(self, int i) nogil:
|
cdef inline const TokenC* H_(self, int i) nogil:
|
||||||
|
self.c.H_(i)
|
||||||
return self.safe_get(self.H(i))
|
return self.safe_get(self.H(i))
|
||||||
|
|
||||||
cdef inline const TokenC* E_(self, int i) nogil:
|
cdef inline const TokenC* E_(self, int i) nogil:
|
||||||
|
self.c.E_(i)
|
||||||
return self.safe_get(self.E(i))
|
return self.safe_get(self.E(i))
|
||||||
|
|
||||||
cdef inline const TokenC* L_(self, int i, int idx) nogil:
|
cdef inline const TokenC* L_(self, int i, int idx) nogil:
|
||||||
|
self.c.L_(i, idx)
|
||||||
return self.safe_get(self.L(i, idx))
|
return self.safe_get(self.L(i, idx))
|
||||||
|
|
||||||
cdef inline const TokenC* R_(self, int i, int idx) nogil:
|
cdef inline const TokenC* R_(self, int i, int idx) nogil:
|
||||||
|
self.c.R_(i, idx)
|
||||||
return self.safe_get(self.R(i, idx))
|
return self.safe_get(self.R(i, idx))
|
||||||
|
|
||||||
cdef inline const TokenC* safe_get(self, int i) nogil:
|
cdef inline const TokenC* safe_get(self, int i) nogil:
|
||||||
|
self.c.safe_get(i)
|
||||||
if i < 0 or i >= self.length:
|
if i < 0 or i >= self.length:
|
||||||
return &self._empty_token
|
return &self._empty_token
|
||||||
else:
|
else:
|
||||||
return &self._sent[i]
|
return &self._sent[i]
|
||||||
|
|
||||||
cdef inline int H(self, int i) nogil:
|
cdef inline int H(self, int i) nogil:
|
||||||
|
self.c.H(i)
|
||||||
if i < 0 or i >= self.length:
|
if i < 0 or i >= self.length:
|
||||||
return -1
|
return -1
|
||||||
return self._sent[i].head + i
|
return self._sent[i].head + i
|
||||||
|
@ -78,38 +93,48 @@ cdef class StateClass:
|
||||||
cdef int L(self, int i, int idx) nogil
|
cdef int L(self, int i, int idx) nogil
|
||||||
|
|
||||||
cdef inline bint empty(self) nogil:
|
cdef inline bint empty(self) nogil:
|
||||||
|
self.c.empty()
|
||||||
return self._s_i <= 0
|
return self._s_i <= 0
|
||||||
|
|
||||||
cdef inline bint eol(self) nogil:
|
cdef inline bint eol(self) nogil:
|
||||||
|
self.c.eol()
|
||||||
return self.buffer_length() == 0
|
return self.buffer_length() == 0
|
||||||
|
|
||||||
cdef inline bint at_break(self) nogil:
|
cdef inline bint at_break(self) nogil:
|
||||||
|
self.c.at_break()
|
||||||
return self._break != -1
|
return self._break != -1
|
||||||
|
|
||||||
cdef inline bint is_final(self) nogil:
|
cdef inline bint is_final(self) nogil:
|
||||||
|
self.c.is_final()
|
||||||
return self.stack_depth() <= 0 and self._b_i >= self.length
|
return self.stack_depth() <= 0 and self._b_i >= self.length
|
||||||
|
|
||||||
cdef inline bint has_head(self, int i) nogil:
|
cdef inline bint has_head(self, int i) nogil:
|
||||||
|
self.c.has_head(i)
|
||||||
return self.safe_get(i).head != 0
|
return self.safe_get(i).head != 0
|
||||||
|
|
||||||
cdef inline int n_L(self, int i) nogil:
|
cdef inline int n_L(self, int i) nogil:
|
||||||
|
self.c.n_L(i)
|
||||||
return self.safe_get(i).l_kids
|
return self.safe_get(i).l_kids
|
||||||
|
|
||||||
cdef inline int n_R(self, int i) nogil:
|
cdef inline int n_R(self, int i) nogil:
|
||||||
|
self.c.n_R(i)
|
||||||
return self.safe_get(i).r_kids
|
return self.safe_get(i).r_kids
|
||||||
|
|
||||||
cdef inline bint stack_is_connected(self) nogil:
|
cdef inline bint stack_is_connected(self) nogil:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
cdef inline bint entity_is_open(self) nogil:
|
cdef inline bint entity_is_open(self) nogil:
|
||||||
|
self.c.entity_is_open()
|
||||||
if self._e_i < 1:
|
if self._e_i < 1:
|
||||||
return False
|
return False
|
||||||
return self._ents[self._e_i-1].end == -1
|
return self._ents[self._e_i-1].end == -1
|
||||||
|
|
||||||
cdef inline int stack_depth(self) nogil:
|
cdef inline int stack_depth(self) nogil:
|
||||||
|
self.c.stack_depth()
|
||||||
return self._s_i
|
return self._s_i
|
||||||
|
|
||||||
cdef inline int buffer_length(self) nogil:
|
cdef inline int buffer_length(self) nogil:
|
||||||
|
self.c.buffer_length()
|
||||||
if self._break != -1:
|
if self._break != -1:
|
||||||
return self._break - self._b_i
|
return self._break - self._b_i
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -47,6 +47,7 @@ cdef class StateClass:
|
||||||
return {self.B(i) for i in range(self._b_i)}
|
return {self.B(i) for i in range(self._b_i)}
|
||||||
|
|
||||||
cdef int E(self, int i) nogil:
|
cdef int E(self, int i) nogil:
|
||||||
|
self.c.E(i)
|
||||||
if self._e_i <= 0 or self._e_i >= self.length:
|
if self._e_i <= 0 or self._e_i >= self.length:
|
||||||
return 0
|
return 0
|
||||||
if i < 0 or i >= self._e_i:
|
if i < 0 or i >= self._e_i:
|
||||||
|
@ -54,6 +55,7 @@ cdef class StateClass:
|
||||||
return self._ents[self._e_i - (i+1)].start
|
return self._ents[self._e_i - (i+1)].start
|
||||||
|
|
||||||
cdef int L(self, int i, int idx) nogil:
|
cdef int L(self, int i, int idx) nogil:
|
||||||
|
self.c.L(i, idx)
|
||||||
if idx < 1:
|
if idx < 1:
|
||||||
return -1
|
return -1
|
||||||
if i < 0 or i >= self.length:
|
if i < 0 or i >= self.length:
|
||||||
|
@ -80,6 +82,7 @@ cdef class StateClass:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
cdef int R(self, int i, int idx) nogil:
|
cdef int R(self, int i, int idx) nogil:
|
||||||
|
self.c.R(i, idx)
|
||||||
if idx < 1:
|
if idx < 1:
|
||||||
return -1
|
return -1
|
||||||
if i < 0 or i >= self.length:
|
if i < 0 or i >= self.length:
|
||||||
|
@ -104,6 +107,7 @@ cdef class StateClass:
|
||||||
return -1
|
return -1
|
||||||
|
|
||||||
cdef void push(self) nogil:
|
cdef void push(self) nogil:
|
||||||
|
self.c.push()
|
||||||
if self.B(0) != -1:
|
if self.B(0) != -1:
|
||||||
self._stack[self._s_i] = self.B(0)
|
self._stack[self._s_i] = self.B(0)
|
||||||
self._s_i += 1
|
self._s_i += 1
|
||||||
|
@ -112,16 +116,19 @@ cdef class StateClass:
|
||||||
self._break = -1
|
self._break = -1
|
||||||
|
|
||||||
cdef void pop(self) nogil:
|
cdef void pop(self) nogil:
|
||||||
|
self.c.pop()
|
||||||
if self._s_i >= 1:
|
if self._s_i >= 1:
|
||||||
self._s_i -= 1
|
self._s_i -= 1
|
||||||
|
|
||||||
cdef void unshift(self) nogil:
|
cdef void unshift(self) nogil:
|
||||||
|
self.c.unshift()
|
||||||
self._b_i -= 1
|
self._b_i -= 1
|
||||||
self._buffer[self._b_i] = self.S(0)
|
self._buffer[self._b_i] = self.S(0)
|
||||||
self._s_i -= 1
|
self._s_i -= 1
|
||||||
self.shifted[self.B(0)] = True
|
self.shifted[self.B(0)] = True
|
||||||
|
|
||||||
cdef void fast_forward(self) nogil:
|
cdef void fast_forward(self) nogil:
|
||||||
|
self.c.fast_forward()
|
||||||
while self.buffer_length() == 0 \
|
while self.buffer_length() == 0 \
|
||||||
or self.stack_depth() == 0 \
|
or self.stack_depth() == 0 \
|
||||||
or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
|
or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
|
||||||
|
@ -144,6 +151,7 @@ cdef class StateClass:
|
||||||
break
|
break
|
||||||
|
|
||||||
cdef void add_arc(self, int head, int child, int label) nogil:
|
cdef void add_arc(self, int head, int child, int label) nogil:
|
||||||
|
self.c.add_arc(head, child, label)
|
||||||
if self.has_head(child):
|
if self.has_head(child):
|
||||||
self.del_arc(self.H(child), child)
|
self.del_arc(self.H(child), child)
|
||||||
|
|
||||||
|
@ -166,6 +174,7 @@ cdef class StateClass:
|
||||||
self._sent[head].l_edge = self._sent[child].l_edge
|
self._sent[head].l_edge = self._sent[child].l_edge
|
||||||
|
|
||||||
cdef void del_arc(self, int h_i, int c_i) nogil:
|
cdef void del_arc(self, int h_i, int c_i) nogil:
|
||||||
|
self.c.del_arc(h_i, c_i)
|
||||||
cdef int dist = h_i - c_i
|
cdef int dist = h_i - c_i
|
||||||
cdef TokenC* h = &self._sent[h_i]
|
cdef TokenC* h = &self._sent[h_i]
|
||||||
if c_i > h_i:
|
if c_i > h_i:
|
||||||
|
@ -176,28 +185,33 @@ cdef class StateClass:
|
||||||
h.l_kids -= 1
|
h.l_kids -= 1
|
||||||
|
|
||||||
cdef void open_ent(self, int label) nogil:
|
cdef void open_ent(self, int label) nogil:
|
||||||
|
self.c.open_ent(label)
|
||||||
self._ents[self._e_i].start = self.B(0)
|
self._ents[self._e_i].start = self.B(0)
|
||||||
self._ents[self._e_i].label = label
|
self._ents[self._e_i].label = label
|
||||||
self._ents[self._e_i].end = -1
|
self._ents[self._e_i].end = -1
|
||||||
self._e_i += 1
|
self._e_i += 1
|
||||||
|
|
||||||
cdef void close_ent(self) nogil:
|
cdef void close_ent(self) nogil:
|
||||||
|
self.c.close_ent()
|
||||||
# Note that we don't decrement _e_i here! We want to maintain all
|
# Note that we don't decrement _e_i here! We want to maintain all
|
||||||
# entities, not over-write them...
|
# entities, not over-write them...
|
||||||
self._ents[self._e_i-1].end = self.B(0)+1
|
self._ents[self._e_i-1].end = self.B(0)+1
|
||||||
self._sent[self.B(0)].ent_iob = 1
|
self._sent[self.B(0)].ent_iob = 1
|
||||||
|
|
||||||
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:
|
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:
|
||||||
|
self.c.set_ent_tag(i, ent_iob, ent_type)
|
||||||
if 0 <= i < self.length:
|
if 0 <= i < self.length:
|
||||||
self._sent[i].ent_iob = ent_iob
|
self._sent[i].ent_iob = ent_iob
|
||||||
self._sent[i].ent_type = ent_type
|
self._sent[i].ent_type = ent_type
|
||||||
|
|
||||||
cdef void set_break(self, int _) nogil:
|
cdef void set_break(self, int _) nogil:
|
||||||
|
self.c.set_break(_)
|
||||||
if 0 <= self.B(0) < self.length:
|
if 0 <= self.B(0) < self.length:
|
||||||
self._sent[self.B(0)].sent_start = True
|
self._sent[self.B(0)].sent_start = True
|
||||||
self._break = self._b_i
|
self._break = self._b_i
|
||||||
|
|
||||||
cdef void clone(self, StateClass src) nogil:
|
cdef void clone(self, StateClass src) nogil:
|
||||||
|
self.c.clone(src.c)
|
||||||
memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
|
memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
|
||||||
memcpy(self._stack, src._stack, self.length * sizeof(int))
|
memcpy(self._stack, src._stack, self.length * sizeof(int))
|
||||||
memcpy(self._buffer, src._buffer, self.length * sizeof(int))
|
memcpy(self._buffer, src._buffer, self.length * sizeof(int))
|
||||||
|
|
Loading…
Reference in New Issue
Block a user