mirror of
https://github.com/explosion/spaCy.git
synced 2025-05-03 15:23:41 +03:00
* Add ctnt field to State, in preparation for constituency parsing
This commit is contained in:
parent
ab67693393
commit
d2ac8d8007
|
@ -48,6 +48,13 @@ cdef struct Entity:
|
||||||
int label
|
int label
|
||||||
|
|
||||||
|
|
||||||
|
cdef struct Constituent:
|
||||||
|
int head
|
||||||
|
int start
|
||||||
|
int end
|
||||||
|
int label
|
||||||
|
|
||||||
|
|
||||||
cdef struct TokenC:
|
cdef struct TokenC:
|
||||||
const LexemeC* lex
|
const LexemeC* lex
|
||||||
Morphology morph
|
Morphology morph
|
||||||
|
@ -65,6 +72,9 @@ cdef struct TokenC:
|
||||||
uint32_t l_edge
|
uint32_t l_edge
|
||||||
uint32_t r_edge
|
uint32_t r_edge
|
||||||
|
|
||||||
|
int attach_order
|
||||||
|
int ctnt_label
|
||||||
|
|
||||||
int ent_iob
|
int ent_iob
|
||||||
int ent_type
|
int ent_type
|
||||||
|
|
||||||
|
|
|
@ -2,13 +2,14 @@ from libc.stdint cimport uint32_t
|
||||||
|
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
from ..structs cimport TokenC, Entity
|
from ..structs cimport TokenC, Entity, Constituent
|
||||||
|
|
||||||
|
|
||||||
cdef struct State:
|
cdef struct State:
|
||||||
TokenC* sent
|
TokenC* sent
|
||||||
int* stack
|
int* stack
|
||||||
Entity* ent
|
Entity* ent
|
||||||
|
Constituent* ctnt
|
||||||
int i
|
int i
|
||||||
int sent_len
|
int sent_len
|
||||||
int stack_len
|
int stack_len
|
||||||
|
|
|
@ -2,7 +2,7 @@ from libc.string cimport memmove, memcpy
|
||||||
from cymem.cymem cimport Pool
|
from cymem.cymem cimport Pool
|
||||||
|
|
||||||
from ..lexeme cimport EMPTY_LEXEME
|
from ..lexeme cimport EMPTY_LEXEME
|
||||||
from ..structs cimport TokenC, Entity
|
from ..structs cimport TokenC, Entity, Constituent
|
||||||
|
|
||||||
|
|
||||||
DEF PADDING = 5
|
DEF PADDING = 5
|
||||||
|
@ -137,10 +137,12 @@ cdef int count_right_kids(const TokenC* head) nogil:
|
||||||
cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL:
|
cdef State* new_state(Pool mem, const TokenC* sent, const int sent_len) except NULL:
|
||||||
cdef int padded_len = sent_len + PADDING + PADDING
|
cdef int padded_len = sent_len + PADDING + PADDING
|
||||||
cdef State* s = <State*>mem.alloc(1, sizeof(State))
|
cdef State* s = <State*>mem.alloc(1, sizeof(State))
|
||||||
|
s.ctnt = <Constituent*>mem.alloc(padded_len, sizeof(Constituent))
|
||||||
s.ent = <Entity*>mem.alloc(padded_len, sizeof(Entity))
|
s.ent = <Entity*>mem.alloc(padded_len, sizeof(Entity))
|
||||||
s.stack = <int*>mem.alloc(padded_len, sizeof(int))
|
s.stack = <int*>mem.alloc(padded_len, sizeof(int))
|
||||||
for i in range(PADDING):
|
for i in range(PADDING):
|
||||||
s.stack[i] = -1
|
s.stack[i] = -1
|
||||||
|
s.ctnt += (PADDING -1)
|
||||||
s.stack += (PADDING - 1)
|
s.stack += (PADDING - 1)
|
||||||
s.ent += (PADDING - 1)
|
s.ent += (PADDING - 1)
|
||||||
assert s.stack[0] == -1
|
assert s.stack[0] == -1
|
||||||
|
|
Loading…
Reference in New Issue
Block a user