mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-30 23:47:31 +03:00 
			
		
		
		
	* Prepare to switch to using state class, instead of state struct
This commit is contained in:
		
							parent
							
								
									2b9629ed62
								
							
						
					
					
						commit
						0895d454fb
					
				|  | @ -4,6 +4,7 @@ from ._state cimport State | |||
| 
 | ||||
| 
 | ||||
| cdef int fill_context(atom_t* context, State* state) except -1 | ||||
| cdef int _new_fill_context(atom_t* context, State* state) except -1 | ||||
| # Context elements | ||||
| 
 | ||||
| # Ensure each token's attributes are listed: w, p, c, c6, c4. The order | ||||
|  |  | |||
|  | @ -20,6 +20,11 @@ from ._state cimport has_head, get_left, get_right | |||
| from ._state cimport count_left_kids, count_right_kids | ||||
| 
 | ||||
| 
 | ||||
| from .stateclass cimport StateClass | ||||
| 
 | ||||
| from cymem.cymem cimport Pool | ||||
| 
 | ||||
| 
 | ||||
| cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: | ||||
|     if token is NULL: | ||||
|         context[0] = 0 | ||||
|  | @ -60,6 +65,53 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: | |||
|         context[10] = token.ent_iob | ||||
|         context[11] = token.ent_type | ||||
| 
 | ||||
| cdef int _new_fill_context(atom_t* ctxt, State* state) except -1: | ||||
|     # Take care to fill every element of context! | ||||
|     # We could memset, but this makes it very easy to have broken features that | ||||
|     # make almost no impact on accuracy. If instead they're unset, the impact | ||||
|     # tends to be dramatic, so we get an obvious regression to fix... | ||||
|     cdef StateClass st = StateClass(state.sent_len) | ||||
|     st.from_struct(state) | ||||
|     fill_token(&ctxt[S2w], st.S_(2)) | ||||
|     fill_token(&ctxt[S1w], st.S_(1)) | ||||
|     fill_token(&ctxt[S1rw], st.R_(st.S(1), 1)) | ||||
|     fill_token(&ctxt[S0lw], st.L_(st.S(0), 1)) | ||||
|     fill_token(&ctxt[S0l2w], st.L_(st.S(0), 2)) | ||||
|     fill_token(&ctxt[S0w], st.S_(0)) | ||||
|     fill_token(&ctxt[S0r2w], st.R_(st.S(0), 2)) | ||||
|     fill_token(&ctxt[S0rw], st.R_(st.S(0), 1)) | ||||
|     fill_token(&ctxt[N0lw], st.L_(st.B(0), 1)) | ||||
|     fill_token(&ctxt[N0l2w], st.L_(st.B(0), 2)) | ||||
|     fill_token(&ctxt[N0w], st.B_(0)) | ||||
|     fill_token(&ctxt[N1w], st.B_(1)) | ||||
|     fill_token(&ctxt[N2w], st.B_(2)) | ||||
|     fill_token(&ctxt[P1w], st.safe_get(st.B(0)-1)) | ||||
|     fill_token(&ctxt[P2w], st.safe_get(st.B(0)-2)) | ||||
| 
 | ||||
|     # TODO | ||||
|     fill_token(&ctxt[E0w], get_e0(state)) | ||||
|     fill_token(&ctxt[E1w], get_e1(state)) | ||||
| 
 | ||||
|     if st.stack_depth() >= 1 and not st.eol(): | ||||
|         ctxt[dist] = min(st.S(0) - st.B(0), 5) # TODO: This is backwards!! | ||||
|     else: | ||||
|         ctxt[dist] = 0 | ||||
|     ctxt[N0lv] = min(st.n_L(st.B(0)), 5) | ||||
|     ctxt[S0lv] = min(st.n_L(st.S(0)), 5) | ||||
|     ctxt[S0rv] = min(st.n_R(st.S(0)), 5) | ||||
|     ctxt[S1lv] = min(st.n_L(st.S(1)), 5) | ||||
|     ctxt[S1rv] = min(st.n_R(st.S(1)), 5) | ||||
| 
 | ||||
|     ctxt[S0_has_head] = 0 | ||||
|     ctxt[S1_has_head] = 0 | ||||
|     ctxt[S2_has_head] = 0 | ||||
|     if st.stack_depth() >= 1: | ||||
|         ctxt[S0_has_head] = st.has_head(st.S(0)) + 1 | ||||
|         if st.stack_depth() >= 2: | ||||
|             ctxt[S1_has_head] = st.has_head(st.S(1)) + 1 | ||||
|             if st.stack_depth() >= 3: | ||||
|                 ctxt[S2_has_head] = st.has_head(st.S(2)) + 1 | ||||
| 
 | ||||
| 
 | ||||
| cdef int fill_context(atom_t* context, State* state) except -1: | ||||
|     # Take care to fill every element of context! | ||||
|  |  | |||
|  | @ -115,29 +115,33 @@ cdef bint has_head(const TokenC* t) nogil: | |||
| 
 | ||||
| cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil: | ||||
|     return _new_get_left(s, head, idx) | ||||
|     #cdef uint32_t kids = head.l_kids | ||||
|     #if kids == 0: | ||||
|     #    return NULL | ||||
|     #cdef int offset = _nth_significant_bit(kids, idx) | ||||
|     #cdef const TokenC* child = head - offset | ||||
|     #if child >= s.sent: | ||||
|     #    return child | ||||
|     ##else: | ||||
|     #    return NULL | ||||
| 
 | ||||
| """ | ||||
|     cdef uint32_t kids = head.l_kids | ||||
|     if kids == 0: | ||||
|         return NULL | ||||
|     cdef int offset = _nth_significant_bit(kids, idx) | ||||
|     cdef const TokenC* child = head - offset | ||||
|     if child >= s.sent: | ||||
|         return child | ||||
|     else: | ||||
|         return NULL | ||||
| """ | ||||
| 
 | ||||
| cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil: | ||||
|     return _new_get_right(s, head, idx) | ||||
|     #cdef uint32_t kids = head.r_kids | ||||
|     #if kids == 0: | ||||
|     #    return NULL | ||||
|     #cdef int offset = _nth_significant_bit(kids, idx) | ||||
|     #cdef const TokenC* child = head + offset | ||||
|     #if child < (s.sent + s.sent_len): | ||||
|     #    return child | ||||
|     #else: | ||||
|     #    return NULL | ||||
| 
 | ||||
| """ | ||||
|     cdef uint32_t kids = head.r_kids | ||||
|     if kids == 0: | ||||
|         return NULL | ||||
|     cdef int offset = _nth_significant_bit(kids, idx) | ||||
|     cdef const TokenC* child = head + offset | ||||
|     if child < (s.sent + s.sent_len): | ||||
|         return child | ||||
|     else: | ||||
|         return NULL | ||||
| """ | ||||
| 
 | ||||
| cdef int count_left_kids(const TokenC* head) nogil: | ||||
|     return _popcount(head.l_kids) | ||||
|  |  | |||
|  | @ -6,6 +6,5 @@ from thinc.typedefs cimport weight_t | |||
| from ._state cimport State | ||||
| from .transition_system cimport TransitionSystem, Transition | ||||
| 
 | ||||
| 
 | ||||
| cdef class ArcEager(TransitionSystem): | ||||
|     pass | ||||
|  |  | |||
|  | @ -22,7 +22,7 @@ from libc.stdint cimport uint32_t | |||
| from libc.string cimport memcpy | ||||
| 
 | ||||
| from cymem.cymem cimport Pool | ||||
| from ..stateclass cimport StateClass | ||||
| from .stateclass cimport StateClass | ||||
| 
 | ||||
| 
 | ||||
| DEF NON_MONOTONIC = True | ||||
|  | @ -59,32 +59,63 @@ MOVE_NAMES[ADJUST] = 'A' | |||
| # Helper functions for the arc-eager oracle | ||||
| 
 | ||||
| cdef int push_cost(const State* st, const GoldParseC* gold, int target) except -1: | ||||
|     # When we push a word, we can't make arcs to or from the stack. So, we lose | ||||
|     # any of those arcs. | ||||
|     cdef StateClass stcls = StateClass(st.sent_len) | ||||
|     stcls.from_struct(st) | ||||
|     cdef int cost = 0 | ||||
|     cost += head_in_stack(st, target, gold.heads) | ||||
|     cost += children_in_stack(st, target, gold.heads) | ||||
|     # If we can Break, we shouldn't push | ||||
|     cdef int i, S_i | ||||
|     for i in range(stcls.stack_depth()): | ||||
|         S_i = stcls.S(i) | ||||
|         if gold.heads[target] == S_i: | ||||
|             cost += 1 | ||||
|         if gold.heads[S_i] == target and (NON_MONOTONIC or not stcls.has_head(S_i)): | ||||
|             cost += 1 | ||||
|     cost += Break.is_valid(st, -1) and Break.move_cost(st, gold) == 0 | ||||
|     return cost | ||||
|     # When we push a word, we can't make arcs to or from the stack. So, we lose | ||||
|     # any of those arcs. | ||||
|     #cost += head_in_stack(st, target, gold.heads) | ||||
|     #cost += children_in_stack(st, target, gold.heads) | ||||
|     # If we can Break, we shouldn't push | ||||
|     #cost += Break.is_valid(st, -1) and Break.move_cost(st, gold) == 0 | ||||
|     #return cost | ||||
| 
 | ||||
| 
 | ||||
| cdef int pop_cost(const State* st, const GoldParseC* gold, int target) except -1: | ||||
|     cdef StateClass stcls = StateClass(st.sent_len) | ||||
|     stcls.from_struct(st) | ||||
|     cdef int cost = 0 | ||||
|     cost += children_in_buffer(st, target, gold.heads) | ||||
|     cost += head_in_buffer(st, target, gold.heads) | ||||
|     cdef int i, B_i | ||||
|     for i in range(stcls.buffer_length()): | ||||
|         B_i = stcls.B(i) | ||||
|         cost += gold.heads[B_i] == target | ||||
|         cost += gold.heads[target] == B_i | ||||
|         if gold.heads[B_i] == B_i or gold.heads[B_i] < target: | ||||
|             break | ||||
|     return cost | ||||
|     #cost += children_in_buffer(st, target, gold.heads) | ||||
|     #cost += head_in_buffer(st, target, gold.heads) | ||||
|     #return cost | ||||
| 
 | ||||
| 
 | ||||
| cdef int arc_cost(const State* st, const GoldParseC* gold, int head, int child) except -1: | ||||
|     cdef StateClass stcls = StateClass(st.sent_len) | ||||
|     stcls.from_struct(st) | ||||
|     if arc_is_gold(gold, head, child): | ||||
|         return 0 | ||||
|     elif (child + st.sent[child].head) == gold.heads[child]: | ||||
|     elif stcls.H(child) == gold.heads[child]: | ||||
|         return 1 | ||||
|     elif gold.heads[child] >= st.i: | ||||
|     elif gold.heads[child] >= stcls.B(0): | ||||
|         return 1 | ||||
|     else: | ||||
|         return 0 | ||||
|     #if arc_is_gold(gold, head, child): | ||||
|     #    return 0 | ||||
|     #elif (child + st.sent[child].head) == gold.heads[child]: | ||||
|     #    return 1 | ||||
|     #elif gold.heads[child] >= st.i: | ||||
|     #    return 1 | ||||
|     #else: | ||||
|     #    return 0 | ||||
| 
 | ||||
| 
 | ||||
| cdef bint arc_is_gold(const GoldParseC* gold, int head, int child) except -1: | ||||
|  | @ -122,7 +153,6 @@ cdef class Shift: | |||
|     cdef bint _new_is_valid(StateClass st, int label) except -1: | ||||
|         return not st.eol() | ||||
| 
 | ||||
| 
 | ||||
|     @staticmethod | ||||
|     cdef int transition(State* state, int label) except -1: | ||||
|         # Set the dep label, in case we need it after we reduce | ||||
|  | @ -596,14 +626,17 @@ cdef class ArcEager(TransitionSystem): | |||
|                 state.sent[i].dep = root_label | ||||
| 
 | ||||
|     cdef int set_valid(self, bint* output, const State* state) except -1: | ||||
|         raise Exception | ||||
|         cdef StateClass stcls = StateClass(state.sent_len) | ||||
|         stcls.from_struct(state) | ||||
|         cdef bint[N_MOVES] is_valid | ||||
|         is_valid[SHIFT] = Shift.is_valid(state, -1) | ||||
|         is_valid[REDUCE] = Reduce.is_valid(state, -1) | ||||
|         is_valid[LEFT] = LeftArc.is_valid(state, -1) | ||||
|         is_valid[RIGHT] = RightArc.is_valid(state, -1) | ||||
|         is_valid[BREAK] = Break.is_valid(state, -1) | ||||
|         is_valid[CONSTITUENT] = Constituent.is_valid(state, -1) | ||||
|         is_valid[ADJUST] = Adjust.is_valid(state, -1) | ||||
|         is_valid[SHIFT] = Shift._new_is_valid(stcls, -1) | ||||
|         is_valid[REDUCE] = Reduce._new_is_valid(stcls, -1) | ||||
|         is_valid[LEFT] = LeftArc._new_is_valid(stcls, -1) | ||||
|         is_valid[RIGHT] = RightArc._new_is_valid(stcls, -1) | ||||
|         is_valid[BREAK] = Break._new_is_valid(stcls, -1) | ||||
|         is_valid[CONSTITUENT] = False # Constituent.is_valid(state, -1) | ||||
|         is_valid[ADJUST] = False # Adjust.is_valid(state, -1) | ||||
|         cdef int i | ||||
|         for i in range(self.n_moves): | ||||
|             output[i] = is_valid[self.c[i].move] | ||||
|  | @ -641,10 +674,10 @@ cdef class ArcEager(TransitionSystem): | |||
|             output[i] = move_costs[move] + label_cost_funcs[move](s, &gold.c, label) | ||||
| 
 | ||||
|     cdef Transition best_valid(self, const weight_t* scores, const State* s) except *: | ||||
|         cdef Pool mem = Pool() | ||||
|         cdef StateClass stcls = StateClass.from_struct(mem, s) | ||||
|         assert s is not NULL | ||||
|         cdef StateClass stcls = StateClass(s.sent_len) | ||||
|         stcls.from_struct(s) | ||||
|         cdef bint[N_MOVES] is_valid | ||||
|         #is_valid[SHIFT] = Shift.is_valid(s, -1) | ||||
|         is_valid[SHIFT] = Shift._new_is_valid(stcls, -1) | ||||
|         is_valid[REDUCE] = Reduce._new_is_valid(stcls, -1) | ||||
|         is_valid[LEFT] = LeftArc._new_is_valid(stcls, -1) | ||||
|  |  | |||
|  | @ -1,4 +1,5 @@ | |||
| # cython: profile=True | ||||
| # cython: experimental_cpp_class_def=True | ||||
| """ | ||||
| MALT-style dependency parser | ||||
| """ | ||||
|  | @ -38,7 +39,9 @@ from ._state cimport State, new_state, copy_state, is_final, push_stack, get_lef | |||
| from ..gold cimport GoldParse | ||||
| 
 | ||||
| from . import _parse_features | ||||
| from ._parse_features cimport fill_context, CONTEXT_SIZE | ||||
| from ._parse_features cimport CONTEXT_SIZE | ||||
| from ._parse_features cimport _new_fill_context as fill_context | ||||
| #from ._parse_features cimport fill_context | ||||
| 
 | ||||
| 
 | ||||
| DEBUG = False | ||||
|  |  | |||
|  | @ -2,14 +2,11 @@ from libc.string cimport memcpy, memset | |||
| 
 | ||||
| from cymem.cymem cimport Pool | ||||
| 
 | ||||
| from structs cimport TokenC | ||||
| from ..structs cimport TokenC | ||||
| 
 | ||||
| from .syntax._state cimport State | ||||
| from ._state cimport State | ||||
| 
 | ||||
| from .vocab cimport EMPTY_LEXEME | ||||
| 
 | ||||
| 
 | ||||
| cdef TokenC EMPTY_TOKEN | ||||
| from ..vocab cimport EMPTY_LEXEME | ||||
| 
 | ||||
| 
 | ||||
| cdef class StateClass: | ||||
|  | @ -17,44 +14,12 @@ cdef class StateClass: | |||
|     cdef int* _stack | ||||
|     cdef int* _buffer | ||||
|     cdef TokenC* _sent | ||||
|     cdef TokenC _empty_token | ||||
|     cdef int length | ||||
|     cdef int _s_i | ||||
|     cdef int _b_i | ||||
| 
 | ||||
|     @staticmethod | ||||
|     cdef inline StateClass init(const TokenC* sent, int length): | ||||
|         cdef StateClass self = StateClass(length) | ||||
|         memcpy(self._sent, sent, sizeof(TokenC*) * length) | ||||
|         return self | ||||
| 
 | ||||
|     @staticmethod | ||||
|     cdef inline StateClass from_struct(Pool mem, const State* state): | ||||
|         cdef StateClass self = StateClass.init(state.sent, state.sent_len) | ||||
|         memcpy(self._stack, state.stack - state.stack_len, sizeof(int) * state.stack_len) | ||||
|         self._s_i = state.stack_len - 1 | ||||
|         self._b_i = state.i | ||||
|         return self | ||||
| 
 | ||||
|     cdef inline const TokenC* S_(self, int i) nogil: | ||||
|         return self.safe_get(self.S(i)) | ||||
| 
 | ||||
|     cdef inline const TokenC* B_(self, int i) nogil: | ||||
|         return self.safe_get(self.B(i)) | ||||
| 
 | ||||
|     cdef inline const TokenC* H_(self, int i) nogil: | ||||
|         return self.safe_get(self.B(i)) | ||||
| 
 | ||||
|     cdef inline const TokenC* L_(self, int i, int idx) nogil: | ||||
|         return self.safe_get(self.L(i, idx)) | ||||
| 
 | ||||
|     cdef inline const TokenC* R_(self, int i, int idx) nogil: | ||||
|         return self.safe_get(self.R(i, idx)) | ||||
| 
 | ||||
|     cdef inline const TokenC* safe_get(self, int i) nogil: | ||||
|         if 0 >= i >= self.length: | ||||
|             return &EMPTY_TOKEN | ||||
|         else: | ||||
|             return self._sent | ||||
|     cdef int from_struct(self, const State* state) except -1 | ||||
| 
 | ||||
|     cdef int S(self, int i) nogil | ||||
|     cdef int B(self, int i) nogil | ||||
|  | @ -64,6 +29,16 @@ cdef class StateClass: | |||
|     cdef int L(self, int i, int idx) nogil | ||||
|     cdef int R(self, int i, int idx) nogil | ||||
| 
 | ||||
|     cdef const TokenC* S_(self, int i) nogil | ||||
|     cdef const TokenC* B_(self, int i) nogil | ||||
| 
 | ||||
|     cdef const TokenC* H_(self, int i) nogil | ||||
| 
 | ||||
|     cdef const TokenC* L_(self, int i, int idx) nogil | ||||
|     cdef const TokenC* R_(self, int i, int idx) nogil | ||||
| 
 | ||||
|     cdef const TokenC* safe_get(self, int i) nogil | ||||
| 
 | ||||
|     cdef bint empty(self) nogil | ||||
| 
 | ||||
|     cdef bint eol(self) nogil | ||||
|  | @ -72,6 +47,10 @@ cdef class StateClass: | |||
| 
 | ||||
|     cdef bint has_head(self, int i) nogil | ||||
| 
 | ||||
|     cdef int n_L(self, int i) nogil | ||||
| 
 | ||||
|     cdef int n_R(self, int i) nogil | ||||
| 
 | ||||
|     cdef bint stack_is_connected(self) nogil | ||||
| 
 | ||||
|     cdef int stack_depth(self) nogil | ||||
|  |  | |||
|  | @ -1,24 +1,33 @@ | |||
| from libc.string cimport memcpy, memset | ||||
| from libc.stdint cimport uint32_t | ||||
| from .vocab cimport EMPTY_LEXEME | ||||
| 
 | ||||
| 
 | ||||
| memset(&EMPTY_TOKEN, 0, sizeof(TokenC)) | ||||
| EMPTY_TOKEN.lex = &EMPTY_LEXEME | ||||
| from ..vocab cimport EMPTY_LEXEME | ||||
| 
 | ||||
| 
 | ||||
| cdef class StateClass: | ||||
|     def __cinit__(self, int length): | ||||
|         self.mem = Pool() | ||||
|         self._stack = <int*>self.mem.alloc(sizeof(int), length) | ||||
|         self._buffer = <int*>self.mem.alloc(sizeof(int), length) | ||||
|         self._sent = <TokenC*>self.mem.alloc(sizeof(TokenC*), length) | ||||
|         self.length = 0 | ||||
|         for i in range(self.length): | ||||
|     def __init__(self, int length): | ||||
|         cdef Pool mem = Pool() | ||||
|         self._buffer = <int*>mem.alloc(length, sizeof(int)) | ||||
|         self._stack = <int*>mem.alloc(length, sizeof(int)) | ||||
|         self._sent = <TokenC*>mem.alloc(length, sizeof(TokenC)) | ||||
|         self.mem = mem | ||||
|         self.length = length | ||||
|         self._s_i = 0 | ||||
|         self._b_i = 0 | ||||
|         cdef int i | ||||
|         for i in range(length): | ||||
|             self._buffer[i] = i | ||||
|         self._empty_token.lex = &EMPTY_LEXEME | ||||
| 
 | ||||
|     cdef int from_struct(self, const State* state) except -1: | ||||
|         self._s_i = state.stack_len | ||||
|         self._b_i = state.i | ||||
|         memcpy(self._sent, state.sent, sizeof(TokenC) * self.length) | ||||
|         cdef int i | ||||
|         for i in range(state.stack_len): | ||||
|             self._stack[self._s_i - (i+1)] = state.stack[-i] | ||||
| 
 | ||||
|     cdef int S(self, int i) nogil: | ||||
|         if self._s_i - (i+1) < 0: | ||||
|         if i >= self._s_i: | ||||
|             return -1 | ||||
|         return self._stack[self._s_i - (i+1)] | ||||
| 
 | ||||
|  | @ -33,14 +42,71 @@ cdef class StateClass: | |||
|         return self._sent[i].head + i | ||||
| 
 | ||||
|     cdef int L(self, int i, int idx) nogil: | ||||
|         if 0 <= _popcount(self.safe_get(i).l_kids) <= idx: | ||||
|         if idx < 1: | ||||
|             return -1 | ||||
|         if i < 0 or i >= self.length: | ||||
|             return -1 | ||||
|         cdef const TokenC* target = &self._sent[i] | ||||
|         cdef const TokenC* ptr = self._sent | ||||
| 
 | ||||
|         while ptr < target: | ||||
|             # If this head is still to the right of us, we can skip to it | ||||
|             # No token that's between this token and this head could be our | ||||
|             # child. | ||||
|             if (ptr.head >= 1) and (ptr + ptr.head) < target: | ||||
|                 ptr += ptr.head | ||||
| 
 | ||||
|             elif ptr + ptr.head == target: | ||||
|                 idx -= 1 | ||||
|                 if idx == 0: | ||||
|                     return ptr - self._sent | ||||
|                 ptr += 1 | ||||
|             else: | ||||
|                 ptr += 1 | ||||
|         return -1 | ||||
|         return _nth_significant_bit(self.safe_get(i).l_kids, idx) | ||||
| 
 | ||||
|     cdef int R(self, int i, int idx) nogil: | ||||
|         if 0 <= _popcount(self.safe_get(i).r_kids) <= idx: | ||||
|         if idx < 1: | ||||
|             return -1 | ||||
|         return _nth_significant_bit(self.safe_get(i).r_kids, idx) | ||||
|         if i < 0 or i >= self.length: | ||||
|             return -1 | ||||
|         cdef const TokenC* ptr = self._sent + (self.length - 1) | ||||
|         cdef const TokenC* target = &self._sent[i] | ||||
|         while ptr > target: | ||||
|             # If this head is still to the right of us, we can skip to it | ||||
|             # No token that's between this token and this head could be our | ||||
|             # child. | ||||
|             if (ptr.head < 0) and ((ptr + ptr.head) > target): | ||||
|                 ptr += ptr.head | ||||
|             elif ptr + ptr.head == target: | ||||
|                 idx -= 1 | ||||
|                 if idx == 0: | ||||
|                     return ptr - self._sent | ||||
|                 ptr -= 1 | ||||
|             else: | ||||
|                 ptr -= 1 | ||||
|         return -1 | ||||
| 
 | ||||
|     cdef const TokenC* S_(self, int i) nogil: | ||||
|         return self.safe_get(self.S(i)) | ||||
| 
 | ||||
|     cdef const TokenC* B_(self, int i) nogil: | ||||
|         return self.safe_get(self.B(i)) | ||||
| 
 | ||||
|     cdef const TokenC* H_(self, int i) nogil: | ||||
|         return self.safe_get(self.B(i)) | ||||
| 
 | ||||
|     cdef const TokenC* L_(self, int i, int idx) nogil: | ||||
|         return self.safe_get(self.L(i, idx)) | ||||
| 
 | ||||
|     cdef const TokenC* R_(self, int i, int idx) nogil: | ||||
|         return self.safe_get(self.R(i, idx)) | ||||
| 
 | ||||
|     cdef const TokenC* safe_get(self, int i) nogil: | ||||
|         if i < 0 or i >= self.length: | ||||
|             return &self._empty_token | ||||
|         else: | ||||
|             return &self._sent[i] | ||||
| 
 | ||||
|     cdef bint empty(self) nogil: | ||||
|         return self._s_i <= 0 | ||||
|  | @ -54,6 +120,12 @@ cdef class StateClass: | |||
|     cdef bint has_head(self, int i) nogil: | ||||
|         return self.safe_get(i).head != 0 | ||||
| 
 | ||||
|     cdef int n_L(self, int i) nogil: | ||||
|         return _popcount(self.safe_get(i).l_kids) | ||||
| 
 | ||||
|     cdef int n_R(self, int i) nogil: | ||||
|         return _popcount(self.safe_get(i).r_kids) | ||||
| 
 | ||||
|     cdef bint stack_is_connected(self) nogil: | ||||
|         return False | ||||
| 
 | ||||
|  |  | |||
|  | @ -51,10 +51,3 @@ cdef class TransitionSystem: | |||
| 
 | ||||
|     cdef Transition best_gold(self, const weight_t* scores, const State* state, | ||||
|                               GoldParse gold) except * | ||||
| 
 | ||||
| 
 | ||||
| #cdef class PyState: | ||||
| #    """Provide a Python class for testing purposes.""" | ||||
| #    cdef Pool mem | ||||
| #    cdef TransitionSystem system | ||||
| #    cdef State* _state | ||||
|  |  | |||
|  | @ -3,6 +3,8 @@ from ._state cimport State | |||
| from ..structs cimport TokenC | ||||
| from thinc.typedefs cimport weight_t | ||||
| 
 | ||||
| from .stateclass cimport StateClass | ||||
| 
 | ||||
| 
 | ||||
| cdef weight_t MIN_SCORE = -90000 | ||||
| 
 | ||||
|  | @ -55,6 +57,8 @@ cdef class TransitionSystem: | |||
| 
 | ||||
|     cdef Transition best_gold(self, const weight_t* scores, const State* s, | ||||
|                               GoldParse gold) except *: | ||||
|         cdef StateClass stcls = StateClass(s.sent_len) | ||||
|         stcls.from_struct(s) | ||||
|         cdef Transition best | ||||
|         cdef weight_t score = MIN_SCORE | ||||
|         cdef int i | ||||
|  | @ -65,39 +69,3 @@ cdef class TransitionSystem: | |||
|                 score = scores[i] | ||||
|         assert score > MIN_SCORE | ||||
|         return best | ||||
| 
 | ||||
| 
 | ||||
| #cdef class PyState: | ||||
| #    """Provide a Python class for testing purposes.""" | ||||
| #    def __init__(self, GoldParse gold): | ||||
| #        self.mem = Pool() | ||||
| #        self.system = EntityRecognition(labels) | ||||
| #        self._state = init_state(self.mem, tokens, gold.length) | ||||
| # | ||||
| #    def transition(self, name): | ||||
| #        cdef const Transition* trans = self._transition_by_name(name) | ||||
| #        trans.do(trans, self._state) | ||||
| # | ||||
| #    def is_valid(self, name): | ||||
| #        cdef const Transition* trans = self._transition_by_name(name) | ||||
| #        return _is_valid(trans.move, trans.label, self._state) | ||||
| # | ||||
| #    def is_gold(self, name): | ||||
| #        cdef const Transition* trans = self._transition_by_name(name) | ||||
| #        return _get_const(trans, self._state, self._gold) | ||||
| # | ||||
| #    property ent: | ||||
| #        def __get__(self): | ||||
| #            pass | ||||
| # | ||||
| #    property n_ents: | ||||
| #        def __get__(self): | ||||
| #            pass | ||||
| # | ||||
| #    property i: | ||||
| #        def __get__(self): | ||||
| #            pass | ||||
| # | ||||
| #    property open_entity: | ||||
| #        def __get__(self): | ||||
| #            return entity_is_open(self._s) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user