mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-12 18:26:30 +03:00
* Improve features for NER
This commit is contained in:
parent
7ecb52c0ed
commit
e181c051d5
|
@ -187,6 +187,28 @@ cpdef enum:
|
||||||
P2_suffix
|
P2_suffix
|
||||||
P2_shape
|
P2_shape
|
||||||
|
|
||||||
|
E0w
|
||||||
|
E0W
|
||||||
|
E0p
|
||||||
|
E0c
|
||||||
|
E0c4
|
||||||
|
E0c6
|
||||||
|
E0L
|
||||||
|
E0_prefix
|
||||||
|
E0_suffix
|
||||||
|
E0_shape
|
||||||
|
|
||||||
|
E1w
|
||||||
|
E1W
|
||||||
|
E1p
|
||||||
|
E1c
|
||||||
|
E1c4
|
||||||
|
E1c6
|
||||||
|
E1L
|
||||||
|
E1_prefix
|
||||||
|
E1_suffix
|
||||||
|
E1_shape
|
||||||
|
|
||||||
# Misc features at the end
|
# Misc features at the end
|
||||||
dist
|
dist
|
||||||
N0lv
|
N0lv
|
||||||
|
|
|
@ -13,6 +13,7 @@ from ..tokens cimport TokenC
|
||||||
from ._state cimport State
|
from ._state cimport State
|
||||||
from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2
|
from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2
|
||||||
from ._state cimport get_p2, get_p1
|
from ._state cimport get_p2, get_p1
|
||||||
|
from ._state cimport get_e0, get_e1
|
||||||
from ._state cimport has_head, get_left, get_right
|
from ._state cimport has_head, get_left, get_right
|
||||||
from ._state cimport count_left_kids, count_right_kids
|
from ._state cimport count_left_kids, count_right_kids
|
||||||
|
|
||||||
|
@ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1:
|
||||||
fill_token(&context[P1w], get_p1(state))
|
fill_token(&context[P1w], get_p1(state))
|
||||||
fill_token(&context[P2w], get_p2(state))
|
fill_token(&context[P2w], get_p2(state))
|
||||||
|
|
||||||
|
fill_token(&context[E0w], get_e0(state))
|
||||||
|
fill_token(&context[E1w], get_e1(state))
|
||||||
if state.stack_len >= 1:
|
if state.stack_len >= 1:
|
||||||
context[dist] = state.stack[0] - state.i
|
context[dist] = state.stack[0] - state.i
|
||||||
else:
|
else:
|
||||||
|
@ -135,6 +138,32 @@ ner = (
|
||||||
|
|
||||||
(P1c, N0c),
|
(P1c, N0c),
|
||||||
(N0c, N1c),
|
(N0c, N1c),
|
||||||
|
|
||||||
|
(E0w,),
|
||||||
|
(E0c,),
|
||||||
|
(E0p,),
|
||||||
|
|
||||||
|
(E0w, N0w),
|
||||||
|
(E0c, N0w),
|
||||||
|
(E0p, N0w),
|
||||||
|
|
||||||
|
(E0p, P1p, N0p),
|
||||||
|
(E0c, P1c, N0c),
|
||||||
|
|
||||||
|
(E0w, P1c),
|
||||||
|
(E0p, P1p),
|
||||||
|
(E0c, P1c),
|
||||||
|
(E0p, E1p),
|
||||||
|
(E0c, P1p),
|
||||||
|
|
||||||
|
(E1w,),
|
||||||
|
(E1c,),
|
||||||
|
(E1p,),
|
||||||
|
|
||||||
|
(E0w, E1w),
|
||||||
|
(E0w, E1p,),
|
||||||
|
(E0p, E1w,),
|
||||||
|
(E0p, E1w),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil:
|
||||||
return &s.sent[s.i-2]
|
return &s.sent[s.i-2]
|
||||||
|
|
||||||
|
|
||||||
|
cdef inline TokenC* get_e0(const State* s) nogil:
|
||||||
|
if s.ent.end != 0:
|
||||||
|
return NULL
|
||||||
|
else:
|
||||||
|
return &s.sent[s.ent.start]
|
||||||
|
|
||||||
|
|
||||||
|
cdef inline TokenC* get_e1(const State* s) nogil:
|
||||||
|
if s.ent.end != 0 or s.ent.start >= (s.i + 1):
|
||||||
|
return NULL
|
||||||
|
else:
|
||||||
|
return &s.sent[s.ent.start + 1]
|
||||||
|
|
||||||
|
|
||||||
cdef inline TokenC* get_n2(const State* s) nogil:
|
cdef inline TokenC* get_n2(const State* s) nogil:
|
||||||
if (s.i + 2) >= s.sent_len:
|
if (s.i + 2) >= s.sent_len:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user