* Add ctnt field to State, in preparation for constituency parsing

This commit is contained in:
Matthew Honnibal 2015-05-06 16:29:10 +02:00
parent ab67693393
commit d2ac8d8007
3 changed files with 15 additions and 2 deletions

View File

@ -48,6 +48,13 @@ cdef struct Entity:
int label
cdef struct Constituent:
int head
int start
int end
int label
cdef struct TokenC:
const LexemeC* lex
Morphology morph
@ -65,6 +72,9 @@ cdef struct TokenC:
uint32_t l_edge
uint32_t r_edge
int attach_order
int ctnt_label
int ent_iob
int ent_type

View File

@ -2,13 +2,14 @@ from libc.stdint cimport uint32_t
from cymem.cymem cimport Pool
from ..structs cimport TokenC, Entity
from ..structs cimport TokenC, Entity, Constituent
cdef struct State:
TokenC* sent
int* stack
Entity* ent
Constituent* ctnt
int i
int sent_len
int stack_len

View File

@ -2,7 +2,7 @@ from libc.string cimport memmove, memcpy
from cymem.cymem cimport Pool
from ..lexeme cimport EMPTY_LEXEME
from ..structs cimport TokenC, Entity
from ..structs cimport TokenC, Entity, Constituent
DEF PADDING = 5
@ -137,10 +137,12 @@ cdef int count_right_kids(const TokenC* head) nogil:
cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL:
cdef int padded_len = sent_len + PADDING + PADDING
cdef State* s = <State*>mem.alloc(1, sizeof(State))
s.ctnt = <Constituent*>mem.alloc(padded_len, sizeof(Constituent))
s.ent = <Entity*>mem.alloc(padded_len, sizeof(Entity))
s.stack = <int*>mem.alloc(padded_len, sizeof(int))
for i in range(PADDING):
s.stack[i] = -1
s.ctnt += (PADDING -1)
s.stack += (PADDING - 1)
s.ent += (PADDING - 1)
assert s.stack[0] == -1