mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 17:24:41 +03:00
* Now fully proxied
This commit is contained in:
parent
7a0e3bb9c1
commit
daaad66448
|
@ -116,7 +116,7 @@ cdef bint _is_gold_root(const GoldParseC* gold, int word) nogil:
|
|||
cdef class Shift:
|
||||
@staticmethod
|
||||
cdef bint is_valid(StateClass st, int label) nogil:
|
||||
return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and not st.B_(0).sent_start
|
||||
return st.buffer_length() >= 2 and not st.c.shifted[st.B(0)] and not st.B_(0).sent_start
|
||||
|
||||
@staticmethod
|
||||
cdef int transition(StateClass st, int label) nogil:
|
||||
|
@ -214,7 +214,7 @@ cdef class RightArc:
|
|||
cdef inline weight_t move_cost(StateClass s, const GoldParseC* gold) nogil:
|
||||
if arc_is_gold(gold, s.S(0), s.B(0)):
|
||||
return 0
|
||||
elif s.shifted[s.B(0)]:
|
||||
elif s.c.shifted[s.B(0)]:
|
||||
return push_cost(s, gold, s.B(0))
|
||||
else:
|
||||
return push_cost(s, gold, s.B(0)) + arc_cost(s, gold, s.S(0), s.B(0))
|
||||
|
@ -378,10 +378,7 @@ cdef class ArcEager(TransitionSystem):
|
|||
|
||||
cdef int initialize_state(self, StateClass st) except -1:
|
||||
# Ensure sent_start is set to 0 throughout
|
||||
for i in range(st.length):
|
||||
st._sent[i].sent_start = False
|
||||
st._sent[i].l_edge = i
|
||||
st._sent[i].r_edge = i
|
||||
for i in range(st.c.length):
|
||||
st.c._sent[i].sent_start = False
|
||||
st.c._sent[i].l_edge = i
|
||||
st.c._sent[i].r_edge = i
|
||||
|
@ -389,14 +386,12 @@ cdef class ArcEager(TransitionSystem):
|
|||
|
||||
cdef int finalize_state(self, StateClass st) nogil:
|
||||
cdef int i
|
||||
for i in range(st.length):
|
||||
if st._sent[i].head == 0 and st._sent[i].dep == 0:
|
||||
st._sent[i].dep = self.root_label
|
||||
for i in range(st.c.length):
|
||||
if st.c._sent[i].head == 0 and st.c._sent[i].dep == 0:
|
||||
st.c._sent[i].dep = self.root_label
|
||||
# If we're not using the Break transition, we segment via root-labelled
|
||||
# arcs between the root words.
|
||||
elif USE_ROOT_ARC_SEGMENT and st._sent[i].dep == self.root_label:
|
||||
st._sent[i].head = 0
|
||||
elif USE_ROOT_ARC_SEGMENT and st.c._sent[i].dep == self.root_label:
|
||||
st.c._sent[i].head = 0
|
||||
|
||||
cdef int set_valid(self, int* output, StateClass stcls) nogil:
|
||||
|
|
|
@ -238,7 +238,7 @@ cdef class In:
|
|||
@staticmethod
|
||||
cdef weight_t cost(StateClass s, const GoldParseC* gold, int label) nogil:
|
||||
move = IN
|
||||
cdef int next_act = gold.ner[s.B(1)].move if s.B(0) < s.length else OUT
|
||||
cdef int next_act = gold.ner[s.B(1)].move if s.B(0) < s.c.length else OUT
|
||||
cdef int g_act = gold.ner[s.B(0)].move
|
||||
cdef int g_tag = gold.ner[s.B(0)].label
|
||||
cdef bint is_sunk = _entity_is_sunk(s, gold.ner)
|
||||
|
|
|
@ -130,7 +130,7 @@ cdef class Parser:
|
|||
for i in range(eg.c.nr_class):
|
||||
eg.c.is_valid[i] = 1
|
||||
self.moves.finalize_state(stcls)
|
||||
tokens.set_parse(stcls._sent)
|
||||
tokens.set_parse(stcls.c._sent)
|
||||
|
||||
def train(self, Doc tokens, GoldParse gold):
|
||||
self.moves.preprocess_gold(gold)
|
||||
|
|
|
@ -11,153 +11,115 @@ from ._state cimport StateC
|
|||
|
||||
cdef class StateClass:
|
||||
cdef Pool mem
|
||||
cdef int* _stack
|
||||
cdef int* _buffer
|
||||
cdef bint* shifted
|
||||
cdef StateC* c
|
||||
cdef TokenC* _sent
|
||||
cdef Entity* _ents
|
||||
cdef TokenC _empty_token
|
||||
cdef int length
|
||||
cdef int _s_i
|
||||
cdef int _b_i
|
||||
cdef int _e_i
|
||||
cdef int _break
|
||||
|
||||
@staticmethod
|
||||
cdef inline StateClass init(const TokenC* sent, int length):
|
||||
cdef StateClass self = StateClass(length)
|
||||
cdef int i
|
||||
for i in range(length):
|
||||
self._sent[i] = sent[i]
|
||||
self._buffer[i] = i
|
||||
for i in range(length, length + 5):
|
||||
self._sent[i].lex = &EMPTY_LEXEME
|
||||
|
||||
self.c = new StateC(sent, length)
|
||||
return self
|
||||
|
||||
cdef inline int S(self, int i) nogil:
|
||||
self.c.S(i)
|
||||
if i >= self._s_i:
|
||||
return -1
|
||||
return self._stack[self._s_i - (i+1)]
|
||||
return self.c.S(i)
|
||||
|
||||
cdef inline int B(self, int i) nogil:
|
||||
self.c.B(i)
|
||||
if (i + self._b_i) >= self.length:
|
||||
return -1
|
||||
return self._buffer[self._b_i + i]
|
||||
return self.c.B(i)
|
||||
|
||||
cdef inline const TokenC* S_(self, int i) nogil:
|
||||
self.c.S_(i)
|
||||
return self.safe_get(self.S(i))
|
||||
return self.c.S_(i)
|
||||
|
||||
cdef inline const TokenC* B_(self, int i) nogil:
|
||||
self.c.B_(i)
|
||||
return self.safe_get(self.B(i))
|
||||
return self.c.B_(i)
|
||||
|
||||
cdef inline const TokenC* H_(self, int i) nogil:
|
||||
self.c.H_(i)
|
||||
return self.safe_get(self.H(i))
|
||||
return self.c.H_(i)
|
||||
|
||||
cdef inline const TokenC* E_(self, int i) nogil:
|
||||
self.c.E_(i)
|
||||
return self.safe_get(self.E(i))
|
||||
return self.c.E_(i)
|
||||
|
||||
cdef inline const TokenC* L_(self, int i, int idx) nogil:
|
||||
self.c.L_(i, idx)
|
||||
return self.safe_get(self.L(i, idx))
|
||||
return self.c.L_(i, idx)
|
||||
|
||||
cdef inline const TokenC* R_(self, int i, int idx) nogil:
|
||||
self.c.R_(i, idx)
|
||||
return self.safe_get(self.R(i, idx))
|
||||
return self.c.R_(i, idx)
|
||||
|
||||
cdef inline const TokenC* safe_get(self, int i) nogil:
|
||||
self.c.safe_get(i)
|
||||
if i < 0 or i >= self.length:
|
||||
return &self._empty_token
|
||||
else:
|
||||
return &self._sent[i]
|
||||
return self.c.safe_get(i)
|
||||
|
||||
cdef inline int H(self, int i) nogil:
|
||||
return self.c.H(i)
|
||||
if i < 0 or i >= self.length:
|
||||
return -1
|
||||
return self._sent[i].head + i
|
||||
|
||||
cdef int E(self, int i) nogil
|
||||
|
||||
cdef int R(self, int i, int idx) nogil
|
||||
|
||||
cdef int L(self, int i, int idx) nogil
|
||||
cdef inline int E(self, int i) nogil:
|
||||
return self.c.E(i)
|
||||
|
||||
cdef inline int L(self, int i, int idx) nogil:
|
||||
return self.c.L(i, idx)
|
||||
|
||||
cdef inline int R(self, int i, int idx) nogil:
|
||||
return self.c.R(i, idx)
|
||||
|
||||
cdef inline bint empty(self) nogil:
|
||||
self.c.empty()
|
||||
return self._s_i <= 0
|
||||
return self.c.empty()
|
||||
|
||||
cdef inline bint eol(self) nogil:
|
||||
self.c.eol()
|
||||
return self.buffer_length() == 0
|
||||
return self.c.eol()
|
||||
|
||||
cdef inline bint at_break(self) nogil:
|
||||
self.c.at_break()
|
||||
return self._break != -1
|
||||
return self.c.at_break()
|
||||
|
||||
cdef inline bint is_final(self) nogil:
|
||||
self.c.is_final()
|
||||
return self.stack_depth() <= 0 and self._b_i >= self.length
|
||||
return self.c.is_final()
|
||||
|
||||
cdef inline bint has_head(self, int i) nogil:
|
||||
#return self.c.has_head(i)
|
||||
return self.safe_get(i).head != 0
|
||||
return self.c.has_head(i)
|
||||
|
||||
cdef inline int n_L(self, int i) nogil:
|
||||
self.c.n_L(i)
|
||||
return self.safe_get(i).l_kids
|
||||
return self.c.n_L(i)
|
||||
|
||||
cdef inline int n_R(self, int i) nogil:
|
||||
self.c.n_R(i)
|
||||
return self.safe_get(i).r_kids
|
||||
return self.c.n_R(i)
|
||||
|
||||
cdef inline bint stack_is_connected(self) nogil:
|
||||
return False
|
||||
|
||||
cdef inline bint entity_is_open(self) nogil:
|
||||
self.c.entity_is_open()
|
||||
if self._e_i < 1:
|
||||
return False
|
||||
return self._ents[self._e_i-1].end == -1
|
||||
return self.c.entity_is_open()
|
||||
|
||||
cdef inline int stack_depth(self) nogil:
|
||||
self.c.stack_depth()
|
||||
return self._s_i
|
||||
return self.c.stack_depth()
|
||||
|
||||
cdef inline int buffer_length(self) nogil:
|
||||
self.c.buffer_length()
|
||||
if self._break != -1:
|
||||
return self._break - self._b_i
|
||||
else:
|
||||
return self.length - self._b_i
|
||||
return self.c.buffer_length()
|
||||
|
||||
cdef void push(self) nogil
|
||||
cdef inline void push(self) nogil:
|
||||
self.c.push()
|
||||
|
||||
cdef void pop(self) nogil
|
||||
cdef inline void pop(self) nogil:
|
||||
self.c.pop()
|
||||
|
||||
cdef void unshift(self) nogil
|
||||
cdef inline void unshift(self) nogil:
|
||||
self.c.unshift()
|
||||
|
||||
cdef void add_arc(self, int head, int child, int label) nogil
|
||||
cdef inline void add_arc(self, int head, int child, int label) nogil:
|
||||
self.c.add_arc(head, child, label)
|
||||
|
||||
cdef void del_arc(self, int head, int child) nogil
|
||||
cdef inline void del_arc(self, int head, int child) nogil:
|
||||
self.c.del_arc(head, child)
|
||||
|
||||
cdef void open_ent(self, int label) nogil
|
||||
cdef inline void open_ent(self, int label) nogil:
|
||||
self.c.open_ent(label)
|
||||
|
||||
cdef void close_ent(self) nogil
|
||||
cdef inline void close_ent(self) nogil:
|
||||
self.c.close_ent()
|
||||
|
||||
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil
|
||||
cdef inline void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:
|
||||
self.c.set_ent_tag(i, ent_iob, ent_type)
|
||||
|
||||
cdef void set_break(self, int i) nogil
|
||||
cdef inline void set_break(self, int i) nogil:
|
||||
self.c.set_break(i)
|
||||
|
||||
cdef void clone(self, StateClass src) nogil
|
||||
cdef inline void clone(self, StateClass src) nogil:
|
||||
self.c.clone(src.c)
|
||||
|
||||
cdef void fast_forward(self) nogil
|
||||
cdef inline void fast_forward(self) nogil:
|
||||
self.c.fast_forward()
|
||||
|
|
|
@ -10,34 +10,7 @@ from ..attrs cimport IS_SPACE
|
|||
cdef class StateClass:
|
||||
def __init__(self, int length):
|
||||
cdef Pool mem = Pool()
|
||||
cdef int PADDING = 5
|
||||
self._buffer = <int*>mem.alloc(length + (PADDING * 2), sizeof(int))
|
||||
self._stack = <int*>mem.alloc(length + (PADDING * 2), sizeof(int))
|
||||
self.shifted = <bint*>mem.alloc(length + (PADDING * 2), sizeof(bint))
|
||||
self._sent = <TokenC*>mem.alloc(length + (PADDING * 2), sizeof(TokenC))
|
||||
self._ents = <Entity*>mem.alloc(length + (PADDING * 2), sizeof(Entity))
|
||||
cdef int i
|
||||
for i in range(length + (PADDING * 2)):
|
||||
self._ents[i].end = -1
|
||||
self._sent[i].l_edge = i
|
||||
self._sent[i].r_edge = i
|
||||
for i in range(length, length + (PADDING * 2)):
|
||||
self._sent[i].lex = &EMPTY_LEXEME
|
||||
self._sent += PADDING
|
||||
self._ents += PADDING
|
||||
self._buffer += PADDING
|
||||
self._stack += PADDING
|
||||
self.shifted += PADDING
|
||||
self.mem = mem
|
||||
self.length = length
|
||||
self._break = -1
|
||||
self._s_i = 0
|
||||
self._b_i = 0
|
||||
self._e_i = 0
|
||||
for i in range(length):
|
||||
self._buffer[i] = i
|
||||
self._empty_token.lex = &EMPTY_LEXEME
|
||||
|
||||
|
||||
def __dealloc__(self):
|
||||
del self.c
|
||||
|
@ -50,131 +23,6 @@ cdef class StateClass:
|
|||
def queue(self):
|
||||
return {self.B(i) for i in range(self._b_i)}
|
||||
|
||||
cdef int E(self, int i) nogil:
|
||||
return self.c.E(i)
|
||||
|
||||
cdef int L(self, int i, int idx) nogil:
|
||||
return self.c.L(i, idx)
|
||||
|
||||
cdef int R(self, int i, int idx) nogil:
|
||||
return self.c.R(i, idx)
|
||||
|
||||
cdef void push(self) nogil:
|
||||
self.c.push()
|
||||
if self.B(0) != -1:
|
||||
self._stack[self._s_i] = self.B(0)
|
||||
self._s_i += 1
|
||||
self._b_i += 1
|
||||
if self._b_i > self._break:
|
||||
self._break = -1
|
||||
|
||||
cdef void pop(self) nogil:
|
||||
self.c.pop()
|
||||
if self._s_i >= 1:
|
||||
self._s_i -= 1
|
||||
|
||||
cdef void unshift(self) nogil:
|
||||
self.c.unshift()
|
||||
self._b_i -= 1
|
||||
self._buffer[self._b_i] = self.S(0)
|
||||
self._s_i -= 1
|
||||
self.shifted[self.B(0)] = True
|
||||
|
||||
cdef void fast_forward(self) nogil:
|
||||
self.c.fast_forward()
|
||||
while self.buffer_length() == 0 \
|
||||
or self.stack_depth() == 0 \
|
||||
or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
|
||||
if self.buffer_length() == 1 and self.stack_depth() == 0:
|
||||
self.push()
|
||||
self.pop()
|
||||
elif self.buffer_length() == 0 and self.stack_depth() == 1:
|
||||
self.pop()
|
||||
elif self.buffer_length() == 0 and self.stack_depth() >= 2:
|
||||
if self.has_head(self.S(0)):
|
||||
self.pop()
|
||||
else:
|
||||
self.unshift()
|
||||
elif (self.length - self._b_i) >= 1 and self.stack_depth() == 0:
|
||||
self.push()
|
||||
elif Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE):
|
||||
self.add_arc(self.B(0), self.S(0), 0)
|
||||
self.pop()
|
||||
else:
|
||||
break
|
||||
|
||||
cdef void add_arc(self, int head, int child, int label) nogil:
|
||||
self.c.add_arc(head, child, label)
|
||||
if self.has_head(child):
|
||||
self.del_arc(self.H(child), child)
|
||||
|
||||
cdef int dist = head - child
|
||||
self._sent[child].head = dist
|
||||
self._sent[child].dep = label
|
||||
cdef int i
|
||||
if child > head:
|
||||
self._sent[head].r_kids += 1
|
||||
# Some transition systems can have a word in the buffer have a
|
||||
# rightward child, e.g. from Unshift.
|
||||
self._sent[head].r_edge = self._sent[child].r_edge
|
||||
i = 0
|
||||
while self.has_head(head) and i < self.length:
|
||||
head = self.H(head)
|
||||
self._sent[head].r_edge = self._sent[child].r_edge
|
||||
i += 1 # Guard against infinite loops
|
||||
else:
|
||||
self._sent[head].l_kids += 1
|
||||
self._sent[head].l_edge = self._sent[child].l_edge
|
||||
|
||||
cdef void del_arc(self, int h_i, int c_i) nogil:
|
||||
self.c.del_arc(h_i, c_i)
|
||||
cdef int dist = h_i - c_i
|
||||
cdef TokenC* h = &self._sent[h_i]
|
||||
if c_i > h_i:
|
||||
h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i
|
||||
h.r_kids -= 1
|
||||
else:
|
||||
h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i
|
||||
h.l_kids -= 1
|
||||
|
||||
cdef void open_ent(self, int label) nogil:
|
||||
self.c.open_ent(label)
|
||||
self._ents[self._e_i].start = self.B(0)
|
||||
self._ents[self._e_i].label = label
|
||||
self._ents[self._e_i].end = -1
|
||||
self._e_i += 1
|
||||
|
||||
cdef void close_ent(self) nogil:
|
||||
self.c.close_ent()
|
||||
# Note that we don't decrement _e_i here! We want to maintain all
|
||||
# entities, not over-write them...
|
||||
self._ents[self._e_i-1].end = self.B(0)+1
|
||||
self._sent[self.B(0)].ent_iob = 1
|
||||
|
||||
cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil:
|
||||
self.c.set_ent_tag(i, ent_iob, ent_type)
|
||||
if 0 <= i < self.length:
|
||||
self._sent[i].ent_iob = ent_iob
|
||||
self._sent[i].ent_type = ent_type
|
||||
|
||||
|
||||
cdef void set_break(self, int _) nogil:
|
||||
self.c.set_break(_)
|
||||
if 0 <= self.B(0) < self.length:
|
||||
self._sent[self.B(0)].sent_start = True
|
||||
self._break = self._b_i
|
||||
|
||||
cdef void clone(self, StateClass src) nogil:
|
||||
self.c.clone(src.c)
|
||||
memcpy(self._sent, src._sent, self.length * sizeof(TokenC))
|
||||
memcpy(self._stack, src._stack, self.length * sizeof(int))
|
||||
memcpy(self._buffer, src._buffer, self.length * sizeof(int))
|
||||
memcpy(self._ents, src._ents, self.length * sizeof(Entity))
|
||||
self._b_i = src._b_i
|
||||
self._s_i = src._s_i
|
||||
self._e_i = src._e_i
|
||||
self._break = src._break
|
||||
|
||||
def print_state(self, words):
|
||||
words = list(words) + ['_']
|
||||
top = words[self.S(0)] + '_%d' % self.S_(0).head
|
||||
|
|
Loading…
Reference in New Issue
Block a user