spaCy/spacy/syntax/_state.pyx

106 lines
2.8 KiB
Cython
Raw Normal View History

2014-12-16 14:44:43 +03:00
# cython: profile=True
from libc.string cimport memmove
from cymem.cymem cimport Pool
from ..lexeme cimport EMPTY_LEXEME
2014-12-16 19:19:43 +03:00
cdef int add_dep(State *s, int head, int child, int label) except -1:
2014-12-20 21:36:29 +03:00
s.sent[child].head = head - child
2014-12-16 19:19:43 +03:00
s.sent[child].dep_tag = label
2014-12-16 14:44:43 +03:00
# Keep a bit-vector tracking child dependencies. If a word has a child at
# offset i from it, set that bit (tracking left and right separately)
if child > head:
2014-12-20 21:36:29 +03:00
s.sent[head].r_kids |= 1 << (-s.sent[child].head)
2014-12-16 14:44:43 +03:00
else:
2014-12-20 21:36:29 +03:00
s.sent[head].l_kids |= 1 << s.sent[child].head
2014-12-16 14:44:43 +03:00
2014-12-16 19:19:43 +03:00
cdef int pop_stack(State *s) except -1:
2014-12-16 14:44:43 +03:00
assert s.stack_len >= 1
s.stack_len -= 1
2014-12-16 19:19:43 +03:00
s.stack -= 1
2014-12-20 21:36:29 +03:00
2014-12-16 14:44:43 +03:00
cdef int push_stack(State *s) except -1:
assert s.i < s.sent_len
s.stack += 1
2014-12-16 19:19:43 +03:00
s.stack[0] = s.i
2014-12-16 14:44:43 +03:00
s.stack_len += 1
s.i += 1
2014-12-20 21:36:29 +03:00
cdef int children_in_buffer(const State *s, int head, int* gold) except -1:
2014-12-16 14:44:43 +03:00
# Golds holds an array of head offsets --- the head of word i is i - golds[i]
# Iterate over the tokens of the queue, and check whether their gold head is
# our target
cdef int i
cdef int n = 0
for i in range(s.i, s.sent_len):
2014-12-16 19:19:43 +03:00
if gold[i] == head:
2014-12-16 14:44:43 +03:00
n += 1
return n
2014-12-20 21:36:29 +03:00
cdef int head_in_buffer(const State *s, const int child, int* gold) except -1:
2014-12-16 19:19:43 +03:00
return gold[child] >= s.i
2014-12-16 14:44:43 +03:00
2014-12-20 21:36:29 +03:00
cdef int children_in_stack(const State *s, const int head, int* gold) except -1:
2014-12-16 14:44:43 +03:00
cdef int i
cdef int n = 0
for i in range(s.stack_len):
2014-12-16 19:19:43 +03:00
if gold[s.stack[-i]] == head:
2014-12-20 21:36:29 +03:00
n += 1
2014-12-16 14:44:43 +03:00
return n
2014-12-20 21:36:29 +03:00
cdef int head_in_stack(const State *s, const int child, int* gold) except -1:
2014-12-16 14:44:43 +03:00
cdef int i
for i in range(s.stack_len):
2014-12-16 19:19:43 +03:00
if gold[child] == s.stack[-i]:
2014-12-16 14:44:43 +03:00
return 1
return 0
2014-12-16 19:19:43 +03:00
cdef const TokenC* get_left(const State* s, const TokenC* head, const int idx) nogil:
2014-12-16 14:44:43 +03:00
cdef uint32_t kids = head.l_kids
if kids == 0:
2014-12-16 19:19:43 +03:00
return NULL
2014-12-16 14:44:43 +03:00
cdef int offset = _nth_significant_bit(kids, idx)
2014-12-16 19:19:43 +03:00
cdef const TokenC* child = head - offset
2014-12-16 14:44:43 +03:00
if child >= s.sent:
return child
else:
2014-12-20 21:36:29 +03:00
return s.sent - 1
2014-12-16 14:44:43 +03:00
2014-12-16 19:19:43 +03:00
cdef const TokenC* get_right(const State* s, const TokenC* head, const int idx) nogil:
2014-12-16 14:44:43 +03:00
cdef uint32_t kids = head.r_kids
if kids == 0:
2014-12-16 19:19:43 +03:00
return NULL
2014-12-16 14:44:43 +03:00
cdef int offset = _nth_significant_bit(kids, idx)
2014-12-16 19:19:43 +03:00
cdef const TokenC* child = head + offset
2014-12-16 14:44:43 +03:00
if child < (s.sent + s.sent_len):
return child
else:
2014-12-20 21:36:29 +03:00
return s.sent - 1
2014-12-16 14:44:43 +03:00
2014-12-20 21:36:29 +03:00
DEF PADDING = 5
2014-12-16 14:44:43 +03:00
cdef State* init_state(Pool mem, TokenC* sent, const int sent_length) except NULL:
cdef int padded_len = sent_length + PADDING + PADDING
cdef State* s = <State*>mem.alloc(1, sizeof(State))
2014-12-16 19:19:43 +03:00
s.stack = <int*>mem.alloc(padded_len, sizeof(int))
2014-12-16 14:44:43 +03:00
for i in range(PADDING):
2014-12-16 19:19:43 +03:00
s.stack[i] = -1
s.stack += (PADDING - 1)
assert s.stack[0] == -1
2014-12-16 14:44:43 +03:00
s.sent = sent
s.stack_len = 0
s.i = 0
s.sent_len = sent_length
return s