mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 16:07:41 +03:00 
			
		
		
		
	* Now fully proxied
This commit is contained in:
		
							parent
							
								
									7a0e3bb9c1
								
							
						
					
					
						commit
						daaad66448
					
				|  | @ -116,7 +116,7 @@ cdef bint _is_gold_root(const GoldParseC* gold, int word) nogil: | |||
| cdef class Shift: | ||||
|     @staticmethod | ||||
|     cdef bint is_valid(StateClass st, int label) nogil: | ||||
|         return st.buffer_length() >= 2 and not st.shifted[st.B(0)] and not st.B_(0).sent_start | ||||
|         return st.buffer_length() >= 2 and not st.c.shifted[st.B(0)] and not st.B_(0).sent_start | ||||
| 
 | ||||
|     @staticmethod | ||||
|     cdef int transition(StateClass st, int label) nogil: | ||||
|  | @ -214,7 +214,7 @@ cdef class RightArc: | |||
|     cdef inline weight_t move_cost(StateClass s, const GoldParseC* gold) nogil: | ||||
|         if arc_is_gold(gold, s.S(0), s.B(0)): | ||||
|             return 0 | ||||
|         elif s.shifted[s.B(0)]: | ||||
|         elif s.c.shifted[s.B(0)]: | ||||
|             return push_cost(s, gold, s.B(0)) | ||||
|         else: | ||||
|             return push_cost(s, gold, s.B(0)) + arc_cost(s, gold, s.S(0), s.B(0)) | ||||
|  | @ -378,10 +378,7 @@ cdef class ArcEager(TransitionSystem): | |||
| 
 | ||||
|     cdef int initialize_state(self, StateClass st) except -1: | ||||
|         # Ensure sent_start is set to 0 throughout | ||||
|         for i in range(st.length): | ||||
|             st._sent[i].sent_start = False | ||||
|             st._sent[i].l_edge = i | ||||
|             st._sent[i].r_edge = i | ||||
|         for i in range(st.c.length): | ||||
|             st.c._sent[i].sent_start = False | ||||
|             st.c._sent[i].l_edge = i | ||||
|             st.c._sent[i].r_edge = i | ||||
|  | @ -389,14 +386,12 @@ cdef class ArcEager(TransitionSystem): | |||
| 
 | ||||
|     cdef int finalize_state(self, StateClass st) nogil: | ||||
|         cdef int i | ||||
|         for i in range(st.length): | ||||
|             if st._sent[i].head == 0 and st._sent[i].dep == 0: | ||||
|                 st._sent[i].dep = self.root_label | ||||
|         for i in range(st.c.length): | ||||
|             if st.c._sent[i].head == 0 and st.c._sent[i].dep == 0: | ||||
|                 st.c._sent[i].dep = self.root_label | ||||
|             # If we're not using the Break transition, we segment via root-labelled | ||||
|             # arcs between the root words. | ||||
|             elif USE_ROOT_ARC_SEGMENT and st._sent[i].dep == self.root_label: | ||||
|                 st._sent[i].head = 0 | ||||
|             elif USE_ROOT_ARC_SEGMENT and st.c._sent[i].dep == self.root_label: | ||||
|                 st.c._sent[i].head = 0 | ||||
| 
 | ||||
|     cdef int set_valid(self, int* output, StateClass stcls) nogil: | ||||
|  |  | |||
|  | @ -238,7 +238,7 @@ cdef class In: | |||
|     @staticmethod | ||||
|     cdef weight_t cost(StateClass s, const GoldParseC* gold, int label) nogil: | ||||
|         move = IN | ||||
|         cdef int next_act = gold.ner[s.B(1)].move if s.B(0) < s.length else OUT | ||||
|         cdef int next_act = gold.ner[s.B(1)].move if s.B(0) < s.c.length else OUT | ||||
|         cdef int g_act = gold.ner[s.B(0)].move | ||||
|         cdef int g_tag = gold.ner[s.B(0)].label | ||||
|         cdef bint is_sunk = _entity_is_sunk(s, gold.ner) | ||||
|  |  | |||
|  | @ -130,7 +130,7 @@ cdef class Parser: | |||
|             for i in range(eg.c.nr_class): | ||||
|                 eg.c.is_valid[i] = 1 | ||||
|         self.moves.finalize_state(stcls) | ||||
|         tokens.set_parse(stcls._sent) | ||||
|         tokens.set_parse(stcls.c._sent) | ||||
|    | ||||
|     def train(self, Doc tokens, GoldParse gold): | ||||
|         self.moves.preprocess_gold(gold) | ||||
|  |  | |||
|  | @ -11,153 +11,115 @@ from ._state cimport StateC | |||
| 
 | ||||
| cdef class StateClass: | ||||
|     cdef Pool mem | ||||
|     cdef int* _stack | ||||
|     cdef int* _buffer | ||||
|     cdef bint* shifted | ||||
|     cdef StateC* c | ||||
|     cdef TokenC* _sent | ||||
|     cdef Entity* _ents | ||||
|     cdef TokenC _empty_token | ||||
|     cdef int length | ||||
|     cdef int _s_i | ||||
|     cdef int _b_i | ||||
|     cdef int _e_i | ||||
|     cdef int _break | ||||
| 
 | ||||
|     @staticmethod | ||||
|     cdef inline StateClass init(const TokenC* sent, int length): | ||||
|         cdef StateClass self = StateClass(length) | ||||
|         cdef int i | ||||
|         for i in range(length): | ||||
|             self._sent[i] = sent[i] | ||||
|             self._buffer[i] = i | ||||
|         for i in range(length, length + 5): | ||||
|             self._sent[i].lex = &EMPTY_LEXEME | ||||
| 
 | ||||
|         self.c = new StateC(sent, length) | ||||
|         return self | ||||
| 
 | ||||
|     cdef inline int S(self, int i) nogil: | ||||
|         self.c.S(i) | ||||
|         if i >= self._s_i: | ||||
|             return -1 | ||||
|         return self._stack[self._s_i - (i+1)] | ||||
|         return self.c.S(i) | ||||
| 
 | ||||
|     cdef inline int B(self, int i) nogil: | ||||
|         self.c.B(i) | ||||
|         if (i + self._b_i) >= self.length: | ||||
|             return -1 | ||||
|         return self._buffer[self._b_i + i] | ||||
|         return self.c.B(i) | ||||
| 
 | ||||
|     cdef inline const TokenC* S_(self, int i) nogil: | ||||
|         self.c.S_(i) | ||||
|         return self.safe_get(self.S(i)) | ||||
|         return self.c.S_(i) | ||||
| 
 | ||||
|     cdef inline const TokenC* B_(self, int i) nogil: | ||||
|         self.c.B_(i) | ||||
|         return self.safe_get(self.B(i)) | ||||
|         return self.c.B_(i) | ||||
| 
 | ||||
|     cdef inline const TokenC* H_(self, int i) nogil: | ||||
|         self.c.H_(i) | ||||
|         return self.safe_get(self.H(i)) | ||||
|         return self.c.H_(i) | ||||
| 
 | ||||
|     cdef inline const TokenC* E_(self, int i) nogil: | ||||
|         self.c.E_(i) | ||||
|         return self.safe_get(self.E(i)) | ||||
|         return self.c.E_(i) | ||||
| 
 | ||||
|     cdef inline const TokenC* L_(self, int i, int idx) nogil: | ||||
|         self.c.L_(i, idx) | ||||
|         return self.safe_get(self.L(i, idx)) | ||||
|         return self.c.L_(i, idx) | ||||
| 
 | ||||
|     cdef inline const TokenC* R_(self, int i, int idx) nogil: | ||||
|         self.c.R_(i, idx) | ||||
|         return self.safe_get(self.R(i, idx)) | ||||
|         return self.c.R_(i, idx) | ||||
| 
 | ||||
|     cdef inline const TokenC* safe_get(self, int i) nogil: | ||||
|         self.c.safe_get(i) | ||||
|         if i < 0 or i >= self.length: | ||||
|             return &self._empty_token | ||||
|         else: | ||||
|             return &self._sent[i] | ||||
|         return self.c.safe_get(i) | ||||
| 
 | ||||
|     cdef inline int H(self, int i) nogil: | ||||
|         return self.c.H(i) | ||||
|         if i < 0 or i >= self.length: | ||||
|             return -1 | ||||
|         return self._sent[i].head + i | ||||
|      | ||||
|     cdef int E(self, int i) nogil | ||||
|     cdef inline int E(self, int i) nogil: | ||||
|         return self.c.E(i) | ||||
| 
 | ||||
|     cdef int R(self, int i, int idx) nogil | ||||
|     cdef inline int L(self, int i, int idx) nogil: | ||||
|         return self.c.L(i, idx) | ||||
| 
 | ||||
|     cdef int L(self, int i, int idx) nogil | ||||
|     cdef inline int R(self, int i, int idx) nogil: | ||||
|         return self.c.R(i, idx) | ||||
| 
 | ||||
|     cdef inline bint empty(self) nogil: | ||||
|         self.c.empty() | ||||
|         return self._s_i <= 0 | ||||
|         return self.c.empty() | ||||
| 
 | ||||
|     cdef inline bint eol(self) nogil: | ||||
|         self.c.eol() | ||||
|         return self.buffer_length() == 0 | ||||
|         return self.c.eol() | ||||
| 
 | ||||
|     cdef inline bint at_break(self) nogil: | ||||
|         self.c.at_break() | ||||
|         return self._break != -1 | ||||
|         return self.c.at_break() | ||||
| 
 | ||||
|     cdef inline bint is_final(self) nogil: | ||||
|         self.c.is_final() | ||||
|         return self.stack_depth() <= 0 and self._b_i >= self.length | ||||
|         return self.c.is_final() | ||||
| 
 | ||||
|     cdef inline bint has_head(self, int i) nogil: | ||||
|         #return self.c.has_head(i) | ||||
|         return self.safe_get(i).head != 0 | ||||
|         return self.c.has_head(i) | ||||
| 
 | ||||
|     cdef inline int n_L(self, int i) nogil: | ||||
|         self.c.n_L(i) | ||||
|         return self.safe_get(i).l_kids | ||||
|         return self.c.n_L(i) | ||||
| 
 | ||||
|     cdef inline int n_R(self, int i) nogil: | ||||
|         self.c.n_R(i) | ||||
|         return self.safe_get(i).r_kids | ||||
|         return self.c.n_R(i) | ||||
| 
 | ||||
|     cdef inline bint stack_is_connected(self) nogil: | ||||
|         return False | ||||
| 
 | ||||
|     cdef inline bint entity_is_open(self) nogil: | ||||
|         self.c.entity_is_open() | ||||
|         if self._e_i < 1: | ||||
|             return False | ||||
|         return self._ents[self._e_i-1].end == -1 | ||||
|         return self.c.entity_is_open() | ||||
| 
 | ||||
|     cdef inline int stack_depth(self) nogil: | ||||
|         self.c.stack_depth() | ||||
|         return self._s_i | ||||
|         return self.c.stack_depth() | ||||
| 
 | ||||
|     cdef inline int buffer_length(self) nogil: | ||||
|         self.c.buffer_length() | ||||
|         if self._break != -1: | ||||
|             return self._break - self._b_i | ||||
|         else: | ||||
|             return self.length - self._b_i | ||||
|         return self.c.buffer_length() | ||||
| 
 | ||||
|     cdef void push(self) nogil | ||||
|     cdef inline void push(self) nogil: | ||||
|         self.c.push() | ||||
| 
 | ||||
|     cdef void pop(self) nogil | ||||
|     cdef inline void pop(self) nogil: | ||||
|         self.c.pop() | ||||
|      | ||||
|     cdef void unshift(self) nogil | ||||
|     cdef inline void unshift(self) nogil: | ||||
|         self.c.unshift() | ||||
| 
 | ||||
|     cdef void add_arc(self, int head, int child, int label) nogil | ||||
|     cdef inline void add_arc(self, int head, int child, int label) nogil: | ||||
|         self.c.add_arc(head, child, label) | ||||
|      | ||||
|     cdef void del_arc(self, int head, int child) nogil | ||||
|     cdef inline void del_arc(self, int head, int child) nogil: | ||||
|         self.c.del_arc(head, child) | ||||
| 
 | ||||
|     cdef void open_ent(self, int label) nogil | ||||
|     cdef inline void open_ent(self, int label) nogil: | ||||
|         self.c.open_ent(label) | ||||
|      | ||||
|     cdef void close_ent(self) nogil | ||||
|     cdef inline void close_ent(self) nogil: | ||||
|         self.c.close_ent() | ||||
|      | ||||
|     cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil | ||||
|     cdef inline void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil: | ||||
|         self.c.set_ent_tag(i, ent_iob, ent_type) | ||||
| 
 | ||||
|     cdef void set_break(self, int i) nogil | ||||
|     cdef inline void set_break(self, int i) nogil: | ||||
|         self.c.set_break(i) | ||||
| 
 | ||||
|     cdef void clone(self, StateClass src) nogil | ||||
|     cdef inline void clone(self, StateClass src) nogil: | ||||
|         self.c.clone(src.c) | ||||
| 
 | ||||
|     cdef void fast_forward(self) nogil | ||||
|     cdef inline void fast_forward(self) nogil: | ||||
|         self.c.fast_forward() | ||||
|  |  | |||
|  | @ -10,34 +10,7 @@ from ..attrs cimport IS_SPACE | |||
| cdef class StateClass: | ||||
|     def __init__(self, int length): | ||||
|         cdef Pool mem = Pool() | ||||
|         cdef int PADDING = 5 | ||||
|         self._buffer = <int*>mem.alloc(length + (PADDING * 2), sizeof(int)) | ||||
|         self._stack = <int*>mem.alloc(length + (PADDING * 2), sizeof(int)) | ||||
|         self.shifted = <bint*>mem.alloc(length + (PADDING * 2), sizeof(bint)) | ||||
|         self._sent = <TokenC*>mem.alloc(length + (PADDING * 2), sizeof(TokenC)) | ||||
|         self._ents = <Entity*>mem.alloc(length + (PADDING * 2), sizeof(Entity)) | ||||
|         cdef int i | ||||
|         for i in range(length + (PADDING * 2)): | ||||
|             self._ents[i].end = -1 | ||||
|             self._sent[i].l_edge = i | ||||
|             self._sent[i].r_edge = i | ||||
|         for i in range(length, length + (PADDING * 2)): | ||||
|             self._sent[i].lex = &EMPTY_LEXEME | ||||
|         self._sent += PADDING | ||||
|         self._ents += PADDING | ||||
|         self._buffer += PADDING | ||||
|         self._stack += PADDING | ||||
|         self.shifted += PADDING | ||||
|         self.mem = mem | ||||
|         self.length = length | ||||
|         self._break = -1 | ||||
|         self._s_i = 0 | ||||
|         self._b_i = 0 | ||||
|         self._e_i = 0 | ||||
|         for i in range(length): | ||||
|             self._buffer[i] = i | ||||
|         self._empty_token.lex = &EMPTY_LEXEME | ||||
| 
 | ||||
| 
 | ||||
|     def __dealloc__(self): | ||||
|         del self.c | ||||
|  | @ -50,131 +23,6 @@ cdef class StateClass: | |||
|     def queue(self): | ||||
|         return {self.B(i) for i in range(self._b_i)} | ||||
| 
 | ||||
|     cdef int E(self, int i) nogil: | ||||
|         return self.c.E(i) | ||||
| 
 | ||||
|     cdef int L(self, int i, int idx) nogil: | ||||
|         return self.c.L(i, idx) | ||||
| 
 | ||||
|     cdef int R(self, int i, int idx) nogil: | ||||
|         return self.c.R(i, idx) | ||||
| 
 | ||||
|     cdef void push(self) nogil: | ||||
|         self.c.push() | ||||
|         if self.B(0) != -1: | ||||
|             self._stack[self._s_i] = self.B(0) | ||||
|         self._s_i += 1 | ||||
|         self._b_i += 1 | ||||
|         if self._b_i > self._break: | ||||
|             self._break = -1 | ||||
| 
 | ||||
|     cdef void pop(self) nogil: | ||||
|         self.c.pop() | ||||
|         if self._s_i >= 1: | ||||
|             self._s_i -= 1 | ||||
| 
 | ||||
|     cdef void unshift(self) nogil: | ||||
|         self.c.unshift() | ||||
|         self._b_i -= 1 | ||||
|         self._buffer[self._b_i] = self.S(0) | ||||
|         self._s_i -= 1 | ||||
|         self.shifted[self.B(0)] = True | ||||
| 
 | ||||
|     cdef void fast_forward(self) nogil: | ||||
|         self.c.fast_forward() | ||||
|         while self.buffer_length() == 0 \ | ||||
|         or self.stack_depth() == 0 \ | ||||
|         or Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE): | ||||
|             if self.buffer_length() == 1 and self.stack_depth() == 0: | ||||
|                 self.push() | ||||
|                 self.pop() | ||||
|             elif self.buffer_length() == 0 and self.stack_depth() == 1: | ||||
|                 self.pop() | ||||
|             elif self.buffer_length() == 0 and self.stack_depth() >= 2: | ||||
|                 if self.has_head(self.S(0)): | ||||
|                     self.pop() | ||||
|                 else: | ||||
|                     self.unshift() | ||||
|             elif (self.length - self._b_i) >= 1 and self.stack_depth() == 0: | ||||
|                 self.push() | ||||
|             elif Lexeme.c_check_flag(self.S_(0).lex, IS_SPACE): | ||||
|                 self.add_arc(self.B(0), self.S(0), 0) | ||||
|                 self.pop() | ||||
|             else: | ||||
|                 break | ||||
| 
 | ||||
|     cdef void add_arc(self, int head, int child, int label) nogil: | ||||
|         self.c.add_arc(head, child, label) | ||||
|         if self.has_head(child): | ||||
|             self.del_arc(self.H(child), child) | ||||
| 
 | ||||
|         cdef int dist = head - child | ||||
|         self._sent[child].head = dist | ||||
|         self._sent[child].dep = label | ||||
|         cdef int i | ||||
|         if child > head: | ||||
|             self._sent[head].r_kids += 1 | ||||
|             # Some transition systems can have a word in the buffer have a | ||||
|             # rightward child, e.g. from Unshift. | ||||
|             self._sent[head].r_edge = self._sent[child].r_edge | ||||
|             i = 0 | ||||
|             while self.has_head(head) and i < self.length: | ||||
|                 head = self.H(head) | ||||
|                 self._sent[head].r_edge = self._sent[child].r_edge | ||||
|                 i += 1 # Guard against infinite loops | ||||
|         else: | ||||
|             self._sent[head].l_kids += 1 | ||||
|             self._sent[head].l_edge = self._sent[child].l_edge | ||||
| 
 | ||||
|     cdef void del_arc(self, int h_i, int c_i) nogil: | ||||
|         self.c.del_arc(h_i, c_i) | ||||
|         cdef int dist = h_i - c_i | ||||
|         cdef TokenC* h = &self._sent[h_i] | ||||
|         if c_i > h_i: | ||||
|             h.r_edge = self.R_(h_i, 2).r_edge if h.r_kids >= 2 else h_i | ||||
|             h.r_kids -= 1 | ||||
|         else: | ||||
|             h.l_edge = self.L_(h_i, 2).l_edge if h.l_kids >= 2 else h_i | ||||
|             h.l_kids -= 1 | ||||
| 
 | ||||
|     cdef void open_ent(self, int label) nogil: | ||||
|         self.c.open_ent(label) | ||||
|         self._ents[self._e_i].start = self.B(0) | ||||
|         self._ents[self._e_i].label = label | ||||
|         self._ents[self._e_i].end = -1 | ||||
|         self._e_i += 1 | ||||
| 
 | ||||
|     cdef void close_ent(self) nogil: | ||||
|         self.c.close_ent() | ||||
|         # Note that we don't decrement _e_i here! We want to maintain all | ||||
|         # entities, not over-write them... | ||||
|         self._ents[self._e_i-1].end = self.B(0)+1 | ||||
|         self._sent[self.B(0)].ent_iob = 1 | ||||
| 
 | ||||
|     cdef void set_ent_tag(self, int i, int ent_iob, int ent_type) nogil: | ||||
|         self.c.set_ent_tag(i, ent_iob, ent_type) | ||||
|         if 0 <= i < self.length: | ||||
|             self._sent[i].ent_iob = ent_iob | ||||
|             self._sent[i].ent_type = ent_type | ||||
| 
 | ||||
| 
 | ||||
|     cdef void set_break(self, int _) nogil: | ||||
|         self.c.set_break(_) | ||||
|         if 0 <= self.B(0) < self.length:  | ||||
|             self._sent[self.B(0)].sent_start = True | ||||
|             self._break = self._b_i | ||||
| 
 | ||||
|     cdef void clone(self, StateClass src) nogil: | ||||
|         self.c.clone(src.c) | ||||
|         memcpy(self._sent, src._sent, self.length * sizeof(TokenC)) | ||||
|         memcpy(self._stack, src._stack, self.length * sizeof(int)) | ||||
|         memcpy(self._buffer, src._buffer, self.length * sizeof(int)) | ||||
|         memcpy(self._ents, src._ents, self.length * sizeof(Entity)) | ||||
|         self._b_i = src._b_i | ||||
|         self._s_i = src._s_i | ||||
|         self._e_i = src._e_i | ||||
|         self._break = src._break | ||||
| 
 | ||||
|     def print_state(self, words): | ||||
|         words = list(words) + ['_'] | ||||
|         top = words[self.S(0)] + '_%d' % self.S_(0).head | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user