* Improve features for NER

This commit is contained in:
Matthew Honnibal 2015-03-10 21:26:13 -04:00
parent 7ecb52c0ed
commit e181c051d5
3 changed files with 64 additions and 0 deletions

View File

@ -187,6 +187,28 @@ cpdef enum:
P2_suffix P2_suffix
P2_shape P2_shape
E0w
E0W
E0p
E0c
E0c4
E0c6
E0L
E0_prefix
E0_suffix
E0_shape
E1w
E1W
E1p
E1c
E1c4
E1c6
E1L
E1_prefix
E1_suffix
E1_shape
# Misc features at the end # Misc features at the end
dist dist
N0lv N0lv

View File

@ -13,6 +13,7 @@ from ..tokens cimport TokenC
from ._state cimport State from ._state cimport State
from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2 from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2
from ._state cimport get_p2, get_p1 from ._state cimport get_p2, get_p1
from ._state cimport get_e0, get_e1
from ._state cimport has_head, get_left, get_right from ._state cimport has_head, get_left, get_right
from ._state cimport count_left_kids, count_right_kids from ._state cimport count_left_kids, count_right_kids
@ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1:
fill_token(&context[P1w], get_p1(state)) fill_token(&context[P1w], get_p1(state))
fill_token(&context[P2w], get_p2(state)) fill_token(&context[P2w], get_p2(state))
fill_token(&context[E0w], get_e0(state))
fill_token(&context[E1w], get_e1(state))
if state.stack_len >= 1: if state.stack_len >= 1:
context[dist] = state.stack[0] - state.i context[dist] = state.stack[0] - state.i
else: else:
@ -135,6 +138,32 @@ ner = (
(P1c, N0c), (P1c, N0c),
(N0c, N1c), (N0c, N1c),
(E0w,),
(E0c,),
(E0p,),
(E0w, N0w),
(E0c, N0w),
(E0p, N0w),
(E0p, P1p, N0p),
(E0c, P1c, N0c),
(E0w, P1c),
(E0p, P1p),
(E0c, P1c),
(E0p, E1p),
(E0c, P1p),
(E1w,),
(E1c,),
(E1p,),
(E0w, E1w),
(E0w, E1p,),
(E0p, E1w,),
(E0p, E1w),
) )

View File

@ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil:
return &s.sent[s.i-2] return &s.sent[s.i-2]
cdef inline TokenC* get_e0(const State* s) nogil:
if s.ent.end != 0:
return NULL
else:
return &s.sent[s.ent.start]
cdef inline TokenC* get_e1(const State* s) nogil:
if s.ent.end != 0 or s.ent.start >= (s.i + 1):
return NULL
else:
return &s.sent[s.ent.start + 1]
cdef inline TokenC* get_n2(const State* s) nogil: cdef inline TokenC* get_n2(const State* s) nogil:
if (s.i + 2) >= s.sent_len: if (s.i + 2) >= s.sent_len: