diff --git a/spacy/syntax/_parse_features.pxd b/spacy/syntax/_parse_features.pxd index 64bf5515a..a51706af3 100644 --- a/spacy/syntax/_parse_features.pxd +++ b/spacy/syntax/_parse_features.pxd @@ -187,6 +187,28 @@ cpdef enum: P2_suffix P2_shape + E0w + E0W + E0p + E0c + E0c4 + E0c6 + E0L + E0_prefix + E0_suffix + E0_shape + + E1w + E1W + E1p + E1c + E1c4 + E1c6 + E1L + E1_prefix + E1_suffix + E1_shape + # Misc features at the end dist N0lv diff --git a/spacy/syntax/_parse_features.pyx b/spacy/syntax/_parse_features.pyx index a204526cc..656926797 100644 --- a/spacy/syntax/_parse_features.pyx +++ b/spacy/syntax/_parse_features.pyx @@ -13,6 +13,7 @@ from ..tokens cimport TokenC from ._state cimport State from ._state cimport get_s2, get_s1, get_s0, get_n0, get_n1, get_n2 from ._state cimport get_p2, get_p1 +from ._state cimport get_e0, get_e1 from ._state cimport has_head, get_left, get_right from ._state cimport count_left_kids, count_right_kids @@ -69,6 +70,8 @@ cdef int fill_context(atom_t* context, State* state) except -1: fill_token(&context[P1w], get_p1(state)) fill_token(&context[P2w], get_p2(state)) + fill_token(&context[E0w], get_e0(state)) + fill_token(&context[E1w], get_e1(state)) if state.stack_len >= 1: context[dist] = state.stack[0] - state.i else: @@ -135,6 +138,32 @@ ner = ( (P1c, N0c), (N0c, N1c), + + (E0w,), + (E0c,), + (E0p,), + + (E0w, N0w), + (E0c, N0w), + (E0p, N0w), + + (E0p, P1p, N0p), + (E0c, P1c, N0c), + + (E0w, P1c), + (E0p, P1p), + (E0c, P1c), + (E0p, E1p), + (E0c, P1p), + + (E1w,), + (E1c,), + (E1p,), + + (E0w, E1w), + (E0w, E1p,), + (E0p, E1w,), + (E0p, E1w), ) diff --git a/spacy/syntax/_state.pxd b/spacy/syntax/_state.pxd index 9936bc33f..5f6e1f303 100644 --- a/spacy/syntax/_state.pxd +++ b/spacy/syntax/_state.pxd @@ -54,6 +54,19 @@ cdef inline TokenC* get_p2(const State* s) nogil: return &s.sent[s.i-2] +cdef inline TokenC* get_e0(const State* s) nogil: + if s.ent.end != 0: + return NULL + else: + return &s.sent[s.ent.start] + + +cdef inline TokenC* get_e1(const State* s) nogil: + if s.ent.end != 0 or s.ent.start >= (s.i + 1): + return NULL + else: + return &s.sent[s.ent.start + 1] + cdef inline TokenC* get_n2(const State* s) nogil: if (s.i + 2) >= s.sent_len: