spaCy/spacy/syntax/_parse_features.pyx

# cython: profile=True
"""
Fill an array, context, with every _atomic_ value our features reference.
We then write the _actual features_ as tuples of the atoms. The machinery
that translates from the tuples to feature-extractors (which pick the values
out of "context") is in features/extractor.pyx

The atomic feature names are listed in a big enum, so that the feature tuples
can refer to them.
"""
from libc.string cimport memset

from itertools import combinations

from ..tokens cimport TokenC

from .stateclass cimport StateClass

from cymem.cymem cimport Pool


cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:
    if token is NULL:
        context[0] = 0
        context[1] = 0
        context[2] = 0
        context[3] = 0
        context[4] = 0
        context[5] = 0
        context[6] = 0
        context[7] = 0
        context[8] = 0
        context[9] = 0
        context[10] = 0
        context[11] = 0
    else:
        context[0] = token.lex.orth
        context[1] = token.lemma
        context[2] = token.tag
        context[3] = token.lex.cluster
        # We've read in the string little-endian, so now we can take & (2**n)-1
        # to get the first n bits of the cluster.
        # e.g. s = "1110010101"
        # s = ''.join(reversed(s))
        # first_4_bits = int(s, 2)
        # print first_4_bits
        # 5
        # print "{0:b}".format(prefix).ljust(4, '0')
        # 1110
        # What we're doing here is picking a number where all bits are 1, e.g.
        # 15 is 1111, 63 is 111111 and doing bitwise AND, so getting all bits in
        # the source that are set to 1.
        context[4] = token.lex.cluster & 15
        context[5] = token.lex.cluster & 63
        context[6] = token.dep if token.head != 0 else 0
        context[7] = token.lex.prefix
        context[8] = token.lex.suffix
        context[9] = token.lex.shape
        context[10] = token.ent_iob
        context[11] = token.ent_type

cdef int fill_context(atom_t* ctxt, StateClass st) nogil:
    # Take care to fill every element of context!
    # We could memset, but this makes it very easy to have broken features that
    # make almost no impact on accuracy. If instead they're unset, the impact
    # tends to be dramatic, so we get an obvious regression to fix...
    fill_token(&ctxt[S2w], st.S_(2))
    fill_token(&ctxt[S1w], st.S_(1))
    fill_token(&ctxt[S1rw], st.R_(st.S(1), 1))
    fill_token(&ctxt[S0lw], st.L_(st.S(0), 1))
    fill_token(&ctxt[S0l2w], st.L_(st.S(0), 2))
    fill_token(&ctxt[S0w], st.S_(0))
    fill_token(&ctxt[S0r2w], st.R_(st.S(0), 2))
    fill_token(&ctxt[S0rw], st.R_(st.S(0), 1))
    fill_token(&ctxt[N0lw], st.L_(st.B(0), 1))
    fill_token(&ctxt[N0l2w], st.L_(st.B(0), 2))
    fill_token(&ctxt[N0w], st.B_(0))
    fill_token(&ctxt[N1w], st.B_(1))
    fill_token(&ctxt[N2w], st.B_(2))
    fill_token(&ctxt[P1w], st.safe_get(st.B(0)-1))
    fill_token(&ctxt[P2w], st.safe_get(st.B(0)-2))

    fill_token(&ctxt[E0w], st.E_(0))
    fill_token(&ctxt[E1w], st.E_(1))

    if st.stack_depth() >= 1 and not st.eol():
        ctxt[dist] = min_(st.B(0) - st.E(0), 5)
    else:
        ctxt[dist] = 0
    ctxt[N0lv] = min_(st.n_L(st.B(0)), 5)
    ctxt[S0lv] = min_(st.n_L(st.S(0)), 5)
    ctxt[S0rv] = min_(st.n_R(st.S(0)), 5)
    ctxt[S1lv] = min_(st.n_L(st.S(1)), 5)
    ctxt[S1rv] = min_(st.n_R(st.S(1)), 5)

    ctxt[S0_has_head] = 0
    ctxt[S1_has_head] = 0
    ctxt[S2_has_head] = 0
    if st.stack_depth() >= 1:
        ctxt[S0_has_head] = st.has_head(st.S(0)) + 1
        if st.stack_depth() >= 2:
            ctxt[S1_has_head] = st.has_head(st.S(1)) + 1
            if st.stack_depth() >= 3:
                ctxt[S2_has_head] = st.has_head(st.S(2)) + 1


cdef inline int min_(int a, int b) nogil:
    return a if a > b else b


ner = (
    (N0W,),
    (P1W,),
    (N1W,),
    (P2W,),
    (N2W,),

    (P1W, N0W,),
    (N0W, N1W),

    (N0_prefix,),
    (N0_suffix,),

    (P1_shape,),
    (N0_shape,),
    (N1_shape,),
    (P1_shape, N0_shape,),
    (N0_shape, P1_shape,),
    (P1_shape, N0_shape, N1_shape),
    (N2_shape,),
    (P2_shape,),

    #(P2_norm, P1_norm, W_norm),
    #(P1_norm, W_norm, N1_norm),
    #(W_norm, N1_norm, N2_norm)

    (P2p,),
    (P1p,),
    (N0p,),
    (N1p,),
    (N2p,),

    (P1p, N0p),
    (N0p, N1p),
    (P2p, P1p, N0p),
    (P1p, N0p, N1p),
    (N0p, N1p, N2p),

    (P2c,),
    (P1c,),
    (N0c,),
    (N1c,),
    (N2c,),

    (P1c, N0c),
    (N0c, N1c),

    (E0W,),
    (E0c,),
    (E0p,),

    (E0W, N0W),
    (E0c, N0W),
    (E0p, N0W),

    (E0p, P1p, N0p),
    (E0c, P1c, N0c),

    (E0w, P1c),
    (E0p, P1p),
    (E0c, P1c),
    (E0p, E1p),
    (E0c, P1p),

    (E1W,),
    (E1c,),
    (E1p,),

    (E0W, E1W),
    (E0W, E1p,),
    (E0p, E1W,),
    (E0p, E1W),

    (P1_ne_iob,),
    (P1_ne_iob, P1_ne_type),
    (N0w, P1_ne_iob, P1_ne_type),

    (N0_shape,),
    (N1_shape,),
    (N2_shape,),
    (P1_shape,),
    (P2_shape,),

    (N0_prefix,),
    (N0_suffix,),

    (P1_ne_iob,),
    (P2_ne_iob,),
    (P1_ne_iob, P2_ne_iob),
    (P1_ne_iob, P1_ne_type),
    (P2_ne_iob, P2_ne_type),
    (N0w, P1_ne_iob, P1_ne_type),

    (N0w, N1w),
)


unigrams = (
    (S2W, S2p),
    (S2c6, S2p),

    (S1W, S1p),
    (S1c6, S1p),

    (S0W, S0p),
    (S0c6, S0p),

    (N0W, N0p),
    (N0p,),
    (N0c,),
    (N0c6, N0p),
    (N0L,),

    (N1W, N1p),
    (N1c6, N1p),

    (N2W, N2p),
    (N2c6, N2p),

    (S0r2W, S0r2p),
    (S0r2c6, S0r2p),
    (S0r2L,),

    (S0rW, S0rp),
    (S0rc6, S0rp),
    (S0rL,),

    (S0l2W, S0l2p),
    (S0l2c6, S0l2p),
    (S0l2L,),

    (S0lW, S0lp),
    (S0lc6, S0lp),
    (S0lL,),

    (N0l2W, N0l2p),
    (N0l2c6, N0l2p),
    (N0l2L,),

    (N0lW, N0lp),
    (N0lc6, N0lp),
    (N0lL,),
)


s0_n0 = (
    (S0W, S0p, N0W, N0p),
    (S0c, S0p, N0c, N0p),
    (S0c6, S0p, N0c6, N0p),
    (S0c4, S0p, N0c4, N0p),
    (S0p, N0p),
    (S0W, N0p),
    (S0p, N0W),
    (S0W, N0c),
    (S0c, N0W),
    (S0p, N0c),
    (S0c, N0p),
    (S0W, S0rp, N0p),
    (S0p, S0rp, N0p),
    (S0p, N0lp, N0W),
    (S0p, N0lp, N0p),
    (S0L, N0p),
    (S0p, S0rL, N0p),
    (S0p, N0lL, N0p),
    (S0p, S0rv, N0p),
    (S0p, N0lv, N0p),
    (S0c6, S0rL, S0r2L, N0p),
    (S0p, N0lL, N0l2L, N0p),
)


s1_s0 = (
    (S1p, S0p),
    (S1p, S0p, S0_has_head),
    (S1W, S0p),
    (S1W, S0p, S0_has_head),
    (S1c, S0p),
    (S1c, S0p, S0_has_head),
    (S1p, S1rL, S0p),
    (S1p, S1rL, S0p, S0_has_head),
    (S1p, S0lL, S0p),
    (S1p, S0lL, S0p, S0_has_head),
    (S1p, S0lL, S0l2L, S0p),
    (S1p, S0lL, S0l2L, S0p, S0_has_head),
    (S1L, S0L, S0W),
    (S1L, S0L, S0p),
    (S1p, S1L, S0L, S0p),
    (S1p, S0p),
)


s1_n0 = (
    (S1p, N0p),
    (S1c, N0c),
    (S1c, N0p),
    (S1p, N0c),
    (S1W, S1p, N0p),
    (S1p, N0W, N0p),
    (S1c6, S1p, N0c6, N0p),
    (S1L, N0p),
    (S1p, S1rL, N0p),
    (S1p, S1rp, N0p)
)


s0_n1 = (
    (S0p, N1p),
    (S0c, N1c),
    (S0c, N1p),
    (S0p, N1c),
    (S0W, S0p, N1p),
    (S0p, N1W, N1p),
    (S0c6, S0p, N1c6, N1p),
    (S0L, N1p),
    (S0p, S0rL, N1p),
)


n0_n1 = (
    (N0W, N0p, N1W, N1p),
    (N0W, N0p, N1p),
    (N0p, N1W, N1p),
    (N0c, N0p, N1c, N1p),
    (N0c6, N0p, N1c6, N1p),
    (N0c, N1c),
    (N0p, N1c),
)

tree_shape = (
    (dist,),
    (S0p, S0_has_head, S1_has_head, S2_has_head),
    (S0p, S0lv, S0rv),
    (N0p, N0lv),
)

trigrams = (
    (N0p, N1p, N2p),
    (S0p, S0lp, S0l2p),
    (S0p, S0rp, S0r2p),
    (S0p, S1p, S2p),
    (S1p, S0p, N0p),
    (S0p, S0lp, N0p),
    (S0p, N0p, N0lp),
    (N0p, N0lp, N0l2p),

    (S0W, S0p, S0rL, S0r2L),
    (S0p, S0rL, S0r2L),

    (S0W, S0p, S0lL, S0l2L),
    (S0p, S0lL, S0l2L),

    (N0W, N0p, N0lL, N0l2L),
    (N0p, N0lL, N0l2L),
)


words = (
    S2w,
    S1w,
    S1rw,
    S0lw,
    S0l2w,
    S0w,
    S0r2w,
    S0rw,
    N0lw,
    N0l2w,
    N0w,
    N1w,
    N2w,
    P1w,
    P2w
)

tags = (
    S2p,
    S1p,
    S1rp,
    S0lp,
    S0l2p,
    S0p,
    S0r2p,
    S0rp,
    N0lp,
    N0l2p,
    N0p,
    N1p,
    N2p,
    P1p,
    P2p
)

labels = (
    S2L,
    S1L,
    S1rL,
    S0lL,
    S0l2L,
    S0L,
    S0r2L,
    S0rL,
    N0lL,
    N0l2L,
    N0L,
    N1L,
    N2L,
    P1L,
    P2L
)
* Add profile directive 2015-06-28 07:07:04 +03:00			`# cython: profile=True`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`"""`
			`Fill an array, context, with every _atomic_ value our features reference.`
			`We then write the _actual features_ as tuples of the atoms. The machinery`
			`that translates from the tuples to feature-extractors (which pick the values`
			`out of "context") is in features/extractor.pyx`

			`The atomic feature names are listed in a big enum, so that the feature tuples`
			`can refer to them.`
			`"""`
* Add comment to fill_context 2015-03-24 06:39:58 +03:00			`from libc.string cimport memset`

* Work on greedy parser 2014-12-16 14:44:43 +03:00			`from itertools import combinations`

			`from ..tokens cimport TokenC`

* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00			`from .stateclass cimport StateClass`

			`from cymem.cymem cimport Pool`


* Work on greedy parser 2014-12-16 14:44:43 +03:00			`cdef inline void fill_token(atom_t* context, const TokenC* token) nogil:`
* Work on greedy parser 2014-12-16 19:19:43 +03:00			`if token is NULL:`
			`context[0] = 0`
			`context[1] = 0`
			`context[2] = 0`
			`context[3] = 0`
			`context[4] = 0`
			`context[5] = 0`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`context[6] = 0`
* Fix bug in prefix, suffix and word shape features in parser and NER 2015-04-10 04:53:33 +03:00			`context[7] = 0`
			`context[8] = 0`
			`context[9] = 0`
* Add ne_iob and ne_type attributes to context vector 2015-04-10 06:02:15 +03:00			`context[10] = 0`
			`context[11] = 0`
* Work on greedy parser 2014-12-16 19:19:43 +03:00			`else:`
* Rename sic to orth 2015-01-22 18:08:25 +03:00			`context[0] = token.lex.orth`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`context[1] = token.lemma`
* Work on word vectors, and other stuff 2015-01-17 08:21:17 +03:00			`context[2] = token.tag`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`context[3] = token.lex.cluster`
* Work on greedy parser 2014-12-16 19:19:43 +03:00			`# We've read in the string little-endian, so now we can take & (2**n)-1`
			`# to get the first n bits of the cluster.`
			`# e.g. s = "1110010101"`
			`# s = ''.join(reversed(s))`
			`# first_4_bits = int(s, 2)`
			`# print first_4_bits`
			`# 5`
			`# print "{0:b}".format(prefix).ljust(4, '0')`
			`# 1110`
			`# What we're doing here is picking a number where all bits are 1, e.g.`
			`# 15 is 1111, 63 is 111111 and doing bitwise AND, so getting all bits in`
			`# the source that are set to 1.`
* Fix two bugs in feature calculation 2015-04-29 00:25:09 +03:00			`context[4] = token.lex.cluster & 15`
			`context[5] = token.lex.cluster & 63`
* Remove old _state imports 2015-06-10 08:09:17 +03:00			`context[6] = token.dep if token.head != 0 else 0`
* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring. 2015-03-10 20:00:23 +03:00			`context[7] = token.lex.prefix`
			`context[8] = token.lex.suffix`
			`context[9] = token.lex.shape`
* Add ne_iob and ne_type attributes to context vector 2015-04-10 06:02:15 +03:00			`context[10] = token.ent_iob`
			`context[11] = token.ent_type`
* Work on greedy parser 2014-12-16 14:44:43 +03:00
* Fix min function in fill_context 2015-06-28 11:46:39 +03:00			`cdef int fill_context(atom_t* ctxt, StateClass st) nogil:`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00			`# Take care to fill every element of context!`
			`# We could memset, but this makes it very easy to have broken features that`
			`# make almost no impact on accuracy. If instead they're unset, the impact`
			`# tends to be dramatic, so we get an obvious regression to fix...`
* Revert last changes 2015-06-28 11:44:28 +03:00			`fill_token(&ctxt[S2w], st.S_(2))`
			`fill_token(&ctxt[S1w], st.S_(1))`
			`fill_token(&ctxt[S1rw], st.R_(st.S(1), 1))`
			`fill_token(&ctxt[S0lw], st.L_(st.S(0), 1))`
			`fill_token(&ctxt[S0l2w], st.L_(st.S(0), 2))`
			`fill_token(&ctxt[S0w], st.S_(0))`
			`fill_token(&ctxt[S0r2w], st.R_(st.S(0), 2))`
			`fill_token(&ctxt[S0rw], st.R_(st.S(0), 1))`
			`fill_token(&ctxt[N0lw], st.L_(st.B(0), 1))`
			`fill_token(&ctxt[N0l2w], st.L_(st.B(0), 2))`
			`fill_token(&ctxt[N0w], st.B_(0))`
			`fill_token(&ctxt[N1w], st.B_(1))`
			`fill_token(&ctxt[N2w], st.B_(2))`
			`fill_token(&ctxt[P1w], st.safe_get(st.B(0)-1))`
			`fill_token(&ctxt[P2w], st.safe_get(st.B(0)-2))`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00
* Move StateClass into the interface for is_valid 2015-06-10 00:23:28 +03:00			`fill_token(&ctxt[E0w], st.E_(0))`
			`fill_token(&ctxt[E1w], st.E_(1))`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00
			`if st.stack_depth() >= 1 and not st.eol():`
* Fix min function in fill_context 2015-06-28 11:46:39 +03:00			`ctxt[dist] = min_(st.B(0) - st.E(0), 5)`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00			`else:`
			`ctxt[dist] = 0`
* Fix min function in fill_context 2015-06-28 11:46:39 +03:00			`ctxt[N0lv] = min_(st.n_L(st.B(0)), 5)`
			`ctxt[S0lv] = min_(st.n_L(st.S(0)), 5)`
			`ctxt[S0rv] = min_(st.n_R(st.S(0)), 5)`
			`ctxt[S1lv] = min_(st.n_L(st.S(1)), 5)`
			`ctxt[S1rv] = min_(st.n_R(st.S(1)), 5)`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00
			`ctxt[S0_has_head] = 0`
			`ctxt[S1_has_head] = 0`
			`ctxt[S2_has_head] = 0`
			`if st.stack_depth() >= 1:`
			`ctxt[S0_has_head] = st.has_head(st.S(0)) + 1`
			`if st.stack_depth() >= 2:`
			`ctxt[S1_has_head] = st.has_head(st.S(1)) + 1`
			`if st.stack_depth() >= 3:`
			`ctxt[S2_has_head] = st.has_head(st.S(2)) + 1`

* Work on greedy parser 2014-12-16 14:44:43 +03:00
* Fix min function in fill_context 2015-06-28 11:46:39 +03:00			`cdef inline int min_(int a, int b) nogil:`
			`return a if a > b else b`


* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring. 2015-03-10 20:00:23 +03:00			`ner = (`
* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(N0W,),`
			`(P1W,),`
			`(N1W,),`
			`(P2W,),`
			`(N2W,),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(P1W, N0W,),`
			`(N0W, N1W),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring. 2015-03-10 20:00:23 +03:00			`(N0_prefix,),`
			`(N0_suffix,),`

			`(P1_shape,),`
			`(N0_shape,),`
			`(N1_shape,),`
			`(P1_shape, N0_shape,),`
			`(N0_shape, P1_shape,),`
			`(P1_shape, N0_shape, N1_shape),`
			`(N2_shape,),`
			`(P2_shape,),`

			`#(P2_norm, P1_norm, W_norm),`
			`#(P1_norm, W_norm, N1_norm),`
			`#(W_norm, N1_norm, N2_norm)`

			`(P2p,),`
			`(P1p,),`
			`(N0p,),`
			`(N1p,),`
			`(N2p,),`

			`(P1p, N0p),`
			`(N0p, N1p),`
			`(P2p, P1p, N0p),`
			`(P1p, N0p, N1p),`
			`(N0p, N1p, N2p),`

			`(P2c,),`
			`(P1c,),`
			`(N0c,),`
			`(N1c,),`
			`(N2c,),`

			`(P1c, N0c),`
			`(N0c, N1c),`
* Improve features for NER 2015-03-11 04:26:13 +03:00
* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(E0W,),`
* Improve features for NER 2015-03-11 04:26:13 +03:00			`(E0c,),`
			`(E0p,),`

* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(E0W, N0W),`
			`(E0c, N0W),`
			`(E0p, N0W),`
* Improve features for NER 2015-03-11 04:26:13 +03:00
			`(E0p, P1p, N0p),`
			`(E0c, P1c, N0c),`

			`(E0w, P1c),`
			`(E0p, P1p),`
			`(E0c, P1c),`
			`(E0p, E1p),`
			`(E0c, P1p),`

* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(E1W,),`
* Improve features for NER 2015-03-11 04:26:13 +03:00			`(E1c,),`
			`(E1p,),`

* Move Span class to own file 2015-03-26 05:19:07 +03:00			`(E0W, E1W),`
			`(E0W, E1p,),`
			`(E0p, E1W,),`
			`(E0p, E1W),`
* Add ne_iob and ne_type features to NER 2015-04-10 20:07:08 +03:00
			`(P1_ne_iob,),`
			`(P1_ne_iob, P1_ne_type),`
			`(N0w, P1_ne_iob, P1_ne_type),`

			`(N0_shape,),`
			`(N1_shape,),`
			`(N2_shape,),`
			`(P1_shape,),`
			`(P2_shape,),`

			`(N0_prefix,),`
			`(N0_suffix,),`

			`(P1_ne_iob,),`
			`(P2_ne_iob,),`
			`(P1_ne_iob, P2_ne_iob),`
			`(P1_ne_iob, P1_ne_type),`
			`(P2_ne_iob, P2_ne_type),`
			`(N0w, P1_ne_iob, P1_ne_type),`
* Add (N0w, N1w) unigram pair to NER features, prompted by failure to detect 'this weekend' 2015-04-15 07:01:18 +03:00
			`(N0w, N1w),`
* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring. 2015-03-10 20:00:23 +03:00			`)`

* Merge train.py 2015-02-04 22:16:14 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`unigrams = (`
			`(S2W, S2p),`
			`(S2c6, S2p),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`(S1W, S1p),`
			`(S1c6, S1p),`

			`(S0W, S0p),`
			`(S0c6, S0p),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`(N0W, N0p),`
			`(N0p,),`
			`(N0c,),`
			`(N0c6, N0p),`
			`(N0L,),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`(N1W, N1p),`
			`(N1c6, N1p),`
Remove trailing whitespace 2015-04-19 11:31:31 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`(N2W, N2p),`
			`(N2c6, N2p),`

			`(S0r2W, S0r2p),`
			`(S0r2c6, S0r2p),`
			`(S0r2L,),`

			`(S0rW, S0rp),`
			`(S0rc6, S0rp),`
			`(S0rL,),`

			`(S0l2W, S0l2p),`
			`(S0l2c6, S0l2p),`
			`(S0l2L,),`

			`(S0lW, S0lp),`
			`(S0lc6, S0lp),`
			`(S0lL,),`

			`(N0l2W, N0l2p),`
			`(N0l2c6, N0l2p),`
			`(N0l2L,),`

			`(N0lW, N0lp),`
			`(N0lc6, N0lp),`
			`(N0lL,),`
			`)`


			`s0_n0 = (`
			`(S0W, S0p, N0W, N0p),`
			`(S0c, S0p, N0c, N0p),`
			`(S0c6, S0p, N0c6, N0p),`
			`(S0c4, S0p, N0c4, N0p),`
			`(S0p, N0p),`
			`(S0W, N0p),`
			`(S0p, N0W),`
			`(S0W, N0c),`
			`(S0c, N0W),`
			`(S0p, N0c),`
			`(S0c, N0p),`
			`(S0W, S0rp, N0p),`
			`(S0p, S0rp, N0p),`
			`(S0p, N0lp, N0W),`
			`(S0p, N0lp, N0p),`
* Add some more features, esp for labels 2015-06-14 22:17:39 +03:00			`(S0L, N0p),`
			`(S0p, S0rL, N0p),`
			`(S0p, N0lL, N0p),`
			`(S0p, S0rv, N0p),`
			`(S0p, N0lv, N0p),`
			`(S0c6, S0rL, S0r2L, N0p),`
			`(S0p, N0lL, N0l2L, N0p),`
			`)`


			`s1_s0 = (`
			`(S1p, S0p),`
			`(S1p, S0p, S0_has_head),`
			`(S1W, S0p),`
			`(S1W, S0p, S0_has_head),`
			`(S1c, S0p),`
			`(S1c, S0p, S0_has_head),`
			`(S1p, S1rL, S0p),`
			`(S1p, S1rL, S0p, S0_has_head),`
			`(S1p, S0lL, S0p),`
			`(S1p, S0lL, S0p, S0_has_head),`
			`(S1p, S0lL, S0l2L, S0p),`
			`(S1p, S0lL, S0l2L, S0p, S0_has_head),`
* Add some more features for S1/S0 2015-06-15 05:07:13 +03:00			`(S1L, S0L, S0W),`
			`(S1L, S0L, S0p),`
			`(S1p, S1L, S0L, S0p),`
* Fix parse features 2015-06-28 10:27:33 +03:00			`(S1p, S0p),`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`)`


			`s1_n0 = (`
			`(S1p, N0p),`
			`(S1c, N0c),`
			`(S1c, N0p),`
			`(S1p, N0c),`
			`(S1W, S1p, N0p),`
			`(S1p, N0W, N0p),`
			`(S1c6, S1p, N0c6, N0p),`
* Add some more features, esp for labels 2015-06-14 22:17:39 +03:00			`(S1L, N0p),`
			`(S1p, S1rL, N0p),`
* Fix parse features 2015-06-28 10:27:33 +03:00			`(S1p, S1rp, N0p)`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`)`


			`s0_n1 = (`
			`(S0p, N1p),`
			`(S0c, N1c),`
			`(S0c, N1p),`
			`(S0p, N1c),`
			`(S0W, S0p, N1p),`
			`(S0p, N1W, N1p),`
			`(S0c6, S0p, N1c6, N1p),`
* Add some more features, esp for labels 2015-06-14 22:17:39 +03:00			`(S0L, N1p),`
			`(S0p, S0rL, N1p),`
* Upd docstrings 2014-12-27 10:45:16 +03:00			`)`

* Fix two bugs in feature calculation 2015-04-29 00:25:09 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`n0_n1 = (`
			`(N0W, N0p, N1W, N1p),`
			`(N0W, N0p, N1p),`
			`(N0p, N1W, N1p),`
			`(N0c, N0p, N1c, N1p),`
			`(N0c6, N0p, N1c6, N1p),`
			`(N0c, N1c),`
			`(N0p, N1c),`
			`)`

			`tree_shape = (`
			`(dist,),`
			`(S0p, S0_has_head, S1_has_head, S2_has_head),`
			`(S0p, S0lv, S0rv),`
			`(N0p, N0lv),`
			`)`

			`trigrams = (`
			`(N0p, N1p, N2p),`
			`(S0p, S0lp, S0l2p),`
			`(S0p, S0rp, S0r2p),`
			`(S0p, S1p, S2p),`
			`(S1p, S0p, N0p),`
			`(S0p, S0lp, N0p),`
			`(S0p, N0p, N0lp),`
			`(N0p, N0lp, N0l2p),`
* Whitespace 2015-04-29 15:22:47 +03:00
* Upd docstrings 2014-12-27 10:45:16 +03:00			`(S0W, S0p, S0rL, S0r2L),`
			`(S0p, S0rL, S0r2L),`

			`(S0W, S0p, S0lL, S0l2L),`
			`(S0p, S0lL, S0l2L),`

			`(N0W, N0p, N0lL, N0l2L),`
			`(N0p, N0lL, N0l2L),`
			`)`
* Add word / tag / label sets, for use in neural net 2015-06-28 06:46:53 +03:00

			`words = (`
			`S2w,`
			`S1w,`
			`S1rw,`
			`S0lw,`
			`S0l2w,`
			`S0w,`
			`S0r2w,`
			`S0rw,`
			`N0lw,`
			`N0l2w,`
			`N0w,`
			`N1w,`
			`N2w,`
			`P1w,`
			`P2w`
			`)`

			`tags = (`
			`S2p,`
			`S1p,`
			`S1rp,`
			`S0lp,`
			`S0l2p,`
			`S0p,`
			`S0r2p,`
			`S0rp,`
			`N0lp,`
			`N0l2p,`
			`N0p,`
			`N1p,`
			`N2p,`
			`P1p,`
			`P2p`
			`)`

			`labels = (`
			`S2L,`
			`S1L,`
			`S1rL,`
			`S0lL,`
			`S0l2L,`
			`S0L,`
			`S0r2L,`
			`S0rL,`
			`N0lL,`
			`N0l2L,`
			`N0L,`
			`N1L,`
			`N2L,`
			`P1L,`
			`P2L`
			`)`