diff --git a/spacy/syntax/_parse_features.pxd b/spacy/syntax/_parse_features.pxd index 191d41ef5..818e50cc1 100644 --- a/spacy/syntax/_parse_features.pxd +++ b/spacy/syntax/_parse_features.pxd @@ -34,6 +34,7 @@ cpdef enum: S2_shape S2_ne_iob S2_ne_type + S2ss S1w S1W @@ -47,6 +48,7 @@ cpdef enum: S1_shape S1_ne_iob S1_ne_type + S1ss S1rw S1rW @@ -60,6 +62,7 @@ cpdef enum: S1r_shape S1r_ne_iob S1r_ne_type + S1rss S0lw S0lW @@ -73,6 +76,7 @@ cpdef enum: S0l_shape S0l_ne_iob S0l_ne_type + S0lss S0l2w S0l2W @@ -86,6 +90,7 @@ cpdef enum: S0l2_shape S0l2_ne_iob S0l2_ne_type + S0l2ss S0w S0W @@ -99,6 +104,7 @@ cpdef enum: S0_shape S0_ne_iob S0_ne_type + S0ss S0r2w S0r2W @@ -112,6 +118,7 @@ cpdef enum: S0r2_shape S0r2_ne_iob S0r2_ne_type + S0r2ss S0rw S0rW @@ -125,6 +132,7 @@ cpdef enum: S0r_shape S0r_ne_iob S0r_ne_type + S0rss N0l2w N0l2W @@ -138,6 +146,7 @@ cpdef enum: N0l2_shape N0l2_ne_iob N0l2_ne_type + N0l2ss N0lw N0lW @@ -151,6 +160,7 @@ cpdef enum: N0l_shape N0l_ne_iob N0l_ne_type + N0lss N0w N0W @@ -164,6 +174,7 @@ cpdef enum: N0_shape N0_ne_iob N0_ne_type + N0ss N1w N1W @@ -177,6 +188,7 @@ cpdef enum: N1_shape N1_ne_iob N1_ne_type + N1ss N2w N2W @@ -190,6 +202,7 @@ cpdef enum: N2_shape N2_ne_iob N2_ne_type + N2ss P1w P1W @@ -203,6 +216,7 @@ cpdef enum: P1_shape P1_ne_iob P1_ne_type + P1ss P2w P2W @@ -216,6 +230,7 @@ cpdef enum: P2_shape P2_ne_iob P2_ne_type + P2ss E0w E0W @@ -229,6 +244,7 @@ cpdef enum: E0_shape E0_ne_iob E0_ne_type + E0ss E1w E1W @@ -242,6 +258,7 @@ cpdef enum: E1_shape E1_ne_iob E1_ne_type + E1ss # Misc features at the end dist diff --git a/spacy/syntax/_parse_features.pyx b/spacy/syntax/_parse_features.pyx index 3dc72f918..bbf0a9c4c 100644 --- a/spacy/syntax/_parse_features.pyx +++ b/spacy/syntax/_parse_features.pyx @@ -18,6 +18,8 @@ from .stateclass cimport StateClass from cymem.cymem cimport Pool +from ..cimport senses + cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: if token is NULL: @@ -33,6 +35,7 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: context[9] = 0 context[10] = 0 context[11] = 0 + context[12] = 0 else: context[0] = token.lex.orth context[1] = token.lemma @@ -58,6 +61,7 @@ cdef inline void fill_token(atom_t* context, const TokenC* token) nogil: context[9] = token.lex.shape context[10] = token.ent_iob context[11] = token.ent_type + context[12] = token.lex.senses & senses.POS_SENSES[token.pos] cdef int fill_context(atom_t* ctxt, StateClass st) nogil: # Take care to fill every element of context! @@ -250,6 +254,22 @@ unigrams = ( (N0lW, N0lp), (N0lc6, N0lp), (N0lL,), + + (S2ss,), + (S1ss,), + (S1rss,), + (S0lss,), + (S0l2ss,), + (S0ss,), + (S0r2ss,), + (S0rss,), + (N0lss,), + (N0l2ss,), + (N0ss,), + (N1ss,), + (N2ss,), + (P1ss,), + (P2ss,), ) @@ -276,6 +296,7 @@ s0_n0 = ( (S0p, N0lv, N0p), (S0c6, S0rL, S0r2L, N0p), (S0p, N0lL, N0l2L, N0p), + (S0ss, N0ss), ) @@ -296,6 +317,7 @@ s1_s0 = ( (S1L, S0L, S0p), (S1p, S1L, S0L, S0p), (S1p, S0p), + (S1ss, S0ss), ) @@ -309,7 +331,8 @@ s1_n0 = ( (S1c6, S1p, N0c6, N0p), (S1L, N0p), (S1p, S1rL, N0p), - (S1p, S1rp, N0p) + (S1p, S1rp, N0p), + (S1ss, N0ss), ) @@ -323,6 +346,7 @@ s0_n1 = ( (S0c6, S0p, N1c6, N1p), (S0L, N1p), (S0p, S0rL, N1p), + (S0ss, N1ss), ) @@ -334,6 +358,7 @@ n0_n1 = ( (N0c6, N0p, N1c6, N1p), (N0c, N1c), (N0p, N1c), + (N0ss, N1ss), ) tree_shape = ( @@ -361,6 +386,7 @@ trigrams = ( (N0W, N0p, N0lL, N0l2L), (N0p, N0lL, N0l2L), + (S1ss, S0ss, N0ss,), )