mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Improve features for NER
This commit is contained in:
parent
7ecb52c0ed
commit
e181c051d5
|
@ -187,6 +187,28 @@ cpdef enum:
|
|||
P2_suffix
|
||||
P2_shape
|
||||
|
||||
E0w
|
||||
E0W
|
||||
E0p
|
||||
E0c
|
||||
E0c4
|
||||
E0c6
|
||||
E0L
|
||||
E0_prefix
|
||||
E0_suffix
|
||||
E0_shape
|
||||
|
||||
E1w
|
||||
E1W
|
||||
E1p
|
||||
E1c
|
||||
E1c4
|
||||
E1c6
|
||||
E1L
|
||||
E1_prefix
|
||||
E1_suffix
|
||||
E1_shape
|
||||
|
||||
# Misc features at the end
|
||||
dist
|
||||
N0lv
|
||||
|
|
|
@ -13,6 +13,7 @@ from ..tokens cimport TokenC
|
|||
from ._state cimport State
|
||||
from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2
|
||||
from ._state cimport get_p2, get_p1
|
||||
from ._state cimport get_e0, get_e1
|
||||
from ._state cimport has_head, get_left, get_right
|
||||
from ._state cimport count_left_kids, count_right_kids
|
||||
|
||||
|
@ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1:
|
|||
fill_token(&context[P1w], get_p1(state))
|
||||
fill_token(&context[P2w], get_p2(state))
|
||||
|
||||
fill_token(&context[E0w], get_e0(state))
|
||||
fill_token(&context[E1w], get_e1(state))
|
||||
if state.stack_len >= 1:
|
||||
context[dist] = state.stack[0] - state.i
|
||||
else:
|
||||
|
@ -135,6 +138,32 @@ ner = (
|
|||
|
||||
(P1c, N0c),
|
||||
(N0c, N1c),
|
||||
|
||||
(E0w,),
|
||||
(E0c,),
|
||||
(E0p,),
|
||||
|
||||
(E0w, N0w),
|
||||
(E0c, N0w),
|
||||
(E0p, N0w),
|
||||
|
||||
(E0p, P1p, N0p),
|
||||
(E0c, P1c, N0c),
|
||||
|
||||
(E0w, P1c),
|
||||
(E0p, P1p),
|
||||
(E0c, P1c),
|
||||
(E0p, E1p),
|
||||
(E0c, P1p),
|
||||
|
||||
(E1w,),
|
||||
(E1c,),
|
||||
(E1p,),
|
||||
|
||||
(E0w, E1w),
|
||||
(E0w, E1p,),
|
||||
(E0p, E1w,),
|
||||
(E0p, E1w),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
@ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil:
|
|||
return &s.sent[s.i-2]
|
||||
|
||||
|
||||
cdef inline TokenC* get_e0(const State* s) nogil:
|
||||
if s.ent.end != 0:
|
||||
return NULL
|
||||
else:
|
||||
return &s.sent[s.ent.start]
|
||||
|
||||
|
||||
cdef inline TokenC* get_e1(const State* s) nogil:
|
||||
if s.ent.end != 0 or s.ent.start >= (s.i + 1):
|
||||
return NULL
|
||||
else:
|
||||
return &s.sent[s.ent.start + 1]
|
||||
|
||||
|
||||
cdef inline TokenC* get_n2(const State* s) nogil:
|
||||
if (s.i + 2) >= s.sent_len:
|
||||
|
|
Loading…
Reference in New Issue
Block a user