spaCy/spacy/syntax/parser.pyx

# cython: profile=True
# cython: experimental_cpp_class_def=True
"""
MALT-style dependency parser
"""
from __future__ import unicode_literals
cimport cython

from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF

from libc.stdint cimport uint32_t, uint64_t
from libc.string cimport memset, memcpy
import random
import os.path
from os import path
import shutil
import json

from cymem.cymem cimport Pool, Address
from murmurhash.mrmr cimport hash64
from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t


from util import Config

from thinc.features cimport Extractor
from thinc.features cimport Feature
from thinc.features cimport count_feats

from thinc.learner cimport LinearModel

from thinc.search cimport Beam
from thinc.search cimport MaxViolation

from ..tokens cimport Tokens, TokenC
from ..strings cimport StringStore

from .arc_eager cimport TransitionSystem, Transition
from .transition_system import OracleError

from ..gold cimport GoldParse

from . import _parse_features
from ._parse_features cimport CONTEXT_SIZE
from ._parse_features cimport fill_context
from .stateclass cimport StateClass


DEBUG = False
def set_debug(val):
    global DEBUG
    DEBUG = val


def get_templates(name):
    pf = _parse_features
    if name == 'ner':
        return pf.ner
    elif name == 'debug':
        return pf.unigrams
    else:
        return (pf.unigrams + pf.s0_n0 + pf.s1_n0 + pf.s0_n1 + pf.n0_n1 + \
                pf.tree_shape + pf.trigrams)


cdef class Parser:
    def __init__(self, StringStore strings, model_dir, transition_system):
        assert os.path.exists(model_dir) and os.path.isdir(model_dir)
        self.cfg = Config.read(model_dir, 'config')
        self.moves = transition_system(strings, self.cfg.labels)
        templates = get_templates(self.cfg.features)
        self.model = Model(self.moves.n_moves, templates, model_dir)

    def __call__(self, Tokens tokens):
        if tokens.length == 0:
            return 0
        if self.cfg.get('beam_width', 1) < 1:
            self._greedy_parse(tokens)
        else:
            self._beam_parse(tokens)

    def train(self, Tokens tokens, GoldParse gold):
        self.moves.preprocess_gold(gold)
        if self.cfg.beam_width < 1:
            return self._greedy_train(tokens, gold)
        else:
            return self._beam_train(tokens, gold)

    cdef int _greedy_parse(self, Tokens tokens) except -1:
        cdef atom_t[CONTEXT_SIZE] context
        cdef int n_feats
        cdef Pool mem = Pool()
        cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)
        self.moves.initialize_state(stcls)
        cdef Transition guess
        words = [w.orth_ for w in tokens]
        while not stcls.is_final():
            #print stcls.print_state(words)
            fill_context(context, stcls)
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, stcls)
            guess.do(stcls, guess.label)
        self.moves.finalize_state(stcls)
        tokens.set_parse(stcls._sent)

    cdef int _beam_parse(self, Tokens tokens) except -1:
        cdef Beam beam = Beam(self.moves.n_moves, self.cfg.beam_width)
        beam.initialize(_init_state, tokens.length, tokens.data)
        beam.check_done(_check_final_state, NULL)
        words = [w.orth_ for w in tokens]
        while not beam.is_done:
            self._advance_beam(beam, None, False, words)
        state = <StateClass>beam.at(0)
        self.moves.finalize_state(state)
        tokens.set_parse(state._sent)
        _cleanup(beam)

    def _greedy_train(self, Tokens tokens, GoldParse gold):
        cdef Pool mem = Pool()
        cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)
        self.moves.initialize_state(stcls)

        cdef int cost
        cdef const Feature* feats
        cdef const weight_t* scores
        cdef Transition guess
        cdef Transition best
        cdef atom_t[CONTEXT_SIZE] context
        loss = 0
        words = [w.orth_ for w in tokens]
        while not stcls.is_final():
            fill_context(context, stcls)
            scores = self.model.score(context)
            guess = self.moves.best_valid(scores, stcls)
            best = self.moves.best_gold(scores, stcls, gold)
            cost = guess.get_cost(stcls, &gold.c, guess.label)
            self.model.update(context, guess.clas, best.clas, cost)
            guess.do(stcls, guess.label)
            loss += cost
        return loss

    def _beam_train(self, Tokens tokens, GoldParse gold_parse):
        cdef Beam pred = Beam(self.moves.n_moves, self.cfg.beam_width)
        pred.initialize(_init_state, tokens.length, tokens.data)
        pred.check_done(_check_final_state, NULL)
        cdef Beam gold = Beam(self.moves.n_moves, self.cfg.beam_width)
        gold.initialize(_init_state, tokens.length, tokens.data)
        gold.check_done(_check_final_state, NULL)

        violn = MaxViolation()
        words = [w.orth_ for w in tokens]
        while not pred.is_done and not gold.is_done:
            self._advance_beam(pred, gold_parse, False, words)
            self._advance_beam(gold, gold_parse, True, words)
            violn.check(pred, gold)
        if pred.loss >= 1:
            counts = {clas: {} for clas in range(self.model.n_classes)}
            self._count_feats(counts, tokens, violn.g_hist, 1)
            self._count_feats(counts, tokens, violn.p_hist, -1)
        else:
            counts = {}
        self.model._model.update(counts)
        _cleanup(pred)
        _cleanup(gold)
        return pred.loss

    def _advance_beam(self, Beam beam, GoldParse gold, bint follow_gold, words):
        cdef atom_t[CONTEXT_SIZE] context
        cdef int i, j, cost
        cdef bint is_valid
        cdef const Transition* move
        for i in range(beam.size):
            stcls = <StateClass>beam.at(i)
            if not stcls.is_final():
                fill_context(context, stcls)
                self.model.set_scores(beam.scores[i], context)
                self.moves.set_valid(beam.is_valid[i], stcls)
        if gold is not None:
            for i in range(beam.size):
                stcls = <StateClass>beam.at(i)
                if not stcls.is_final():
                    self.moves.set_costs(beam.costs[i], stcls, gold)
                    if follow_gold:
                        for j in range(self.moves.n_moves):
                            beam.is_valid[i][j] *= beam.costs[i][j] == 0
        beam.advance(_transition_state, _hash_state, <void*>self.moves.c)
        beam.check_done(_check_final_state, NULL)

    def _count_feats(self, dict counts, Tokens tokens, list hist, int inc):
        cdef atom_t[CONTEXT_SIZE] context
        cdef Pool mem = Pool()
        cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)
        self.moves.initialize_state(stcls)

        cdef class_t clas
        cdef int n_feats
        for clas in hist:
            fill_context(context, stcls)
            feats = self.model._extractor.get_feats(context, &n_feats)
            count_feats(counts[clas], feats, n_feats, inc)
            self.moves.c[clas].do(stcls, self.moves.c[clas].label)


# These are passed as callbacks to thinc.search.Beam

cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:
    dest = <StateClass>_dest
    src = <StateClass>_src
    moves = <const Transition*>_moves
    dest.clone(src)
    moves[clas].do(dest, moves[clas].label)


cdef void* _init_state(Pool mem, int length, void* tokens) except NULL:
    cdef StateClass st = StateClass.init(<const TokenC*>tokens, length)
    st.push()
    Py_INCREF(st)
    return <void*>st


cdef int _check_final_state(void* _state, void* extra_args) except -1:
    return (<StateClass>_state).is_final()


def _cleanup(Beam beam):
    for i in range(beam.width):
        Py_XDECREF(<PyObject*>beam._states[i].content)
        Py_XDECREF(<PyObject*>beam._parents[i].content)

cdef hash_t _hash_state(void* _state, void* _) except 0:
    return <hash_t>_state
    
    #state = <const State*>_state
    #cdef atom_t[10] rep

    #rep[0] = state.stack[0] if state.stack_len >= 1 else 0
    #rep[1] = state.stack[-1] if state.stack_len >= 2 else 0
    #rep[2] = state.stack[-2] if state.stack_len >= 3 else 0
    #rep[3] = state.i
    #rep[4] = state.sent[state.stack[0]].l_kids if state.stack_len >= 1 else 0
    #rep[5] = state.sent[state.stack[0]].r_kids if state.stack_len >= 1 else 0
    #rep[6] = state.sent[state.stack[0]].dep if state.stack_len >= 1 else 0
    #rep[7] = state.sent[state.stack[-1]].dep if state.stack_len >= 2 else 0
    #if get_left(state, get_n0(state), 1) != NULL:
    #    rep[8] = get_left(state, get_n0(state), 1).dep 
    #else:
    #    rep[8] = 0
    #rep[9] = state.sent[state.i].l_kids
    #return hash64(rep, sizeof(atom_t) * 10, 0)
* Refactor _advance_beam function 2015-06-02 19:38:41 +03:00			`# cython: profile=True`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00			`# cython: experimental_cpp_class_def=True`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`"""`
			`MALT-style dependency parser`
			`"""`
			`from __future__ import unicode_literals`
			`cimport cython`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00
			`from cpython.ref cimport PyObject, Py_INCREF, Py_XDECREF`

* Work on implementing a trainable cache for the parser. So far, doesn't improve efficiency 2014-12-19 01:30:50 +03:00			`from libc.stdint cimport uint32_t, uint64_t`
* Refactor _advance_beam function 2015-06-02 19:38:41 +03:00			`from libc.string cimport memset, memcpy`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`import random`
			`import os.path`
* Refactor _ml.Model, and finish implementing HastyModel so far not worthwhile. 2014-12-31 11:40:59 +03:00			`from os import path`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`import shutil`
			`import json`

			`from cymem.cymem cimport Pool, Address`
* Work on implementing a trainable cache for the parser. So far, doesn't improve efficiency 2014-12-19 01:30:50 +03:00			`from murmurhash.mrmr cimport hash64`
* Don't automatically push words when stack is empty, as it messes up beam parsing. Add hash method to beam state. 2015-06-08 15:49:04 +03:00			`from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t`
* Work on greedy parser 2014-12-16 14:44:43 +03:00

			`from util import Config`

			`from thinc.features cimport Extractor`
			`from thinc.features cimport Feature`
			`from thinc.features cimport count_feats`

			`from thinc.learner cimport LinearModel`

* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`from thinc.search cimport Beam`
			`from thinc.search cimport MaxViolation`

* Work on greedy parser 2014-12-16 14:44:43 +03:00			`from ..tokens cimport Tokens, TokenC`
* Use StringStore to encode label names, instead of label_ids 2015-03-14 18:06:35 +03:00			`from ..strings cimport StringStore`
* Work on greedy parser 2014-12-16 14:44:43 +03:00
* Add non-monotonic parsing with cost-sensitive update. 92.26 on Y&M set 2014-12-18 03:33:25 +03:00			`from .arc_eager cimport TransitionSystem, Transition`
* Refactored transition system code now compiling. Still need to hook up label oracle, and test 2015-02-22 08:32:07 +03:00			`from .transition_system import OracleError`
* Work on greedy parser 2014-12-16 14:44:43 +03:00
* Move spacy.syntax.conll to spacy.gold 2015-05-24 22:35:02 +03:00			`from ..gold cimport GoldParse`
* Work on greedy parser 2014-12-16 14:44:43 +03:00
			`from . import _parse_features`
* Prepare to switch to using state class, instead of state struct 2015-06-09 22:20:14 +03:00			`from ._parse_features cimport CONTEXT_SIZE`
* Move StateClass into the interface for is_valid 2015-06-10 00:23:28 +03:00			`from ._parse_features cimport fill_context`
			`from .stateclass cimport StateClass`
* Work on greedy parser 2014-12-16 14:44:43 +03:00

Remove trailing whitespace 2015-04-19 11:31:31 +03:00			`DEBUG = False`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`def set_debug(val):`
			`global DEBUG`
			`DEBUG = val`


			`def get_templates(name):`
* Work on greedy parser 2014-12-17 13:09:29 +03:00			`pf = _parse_features`
* Clean up unused feature templates 2015-03-24 07:08:35 +03:00			`if name == 'ner':`
* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring. 2015-03-10 20:00:23 +03:00			`return pf.ner`
* Add support for debug feature set. Just use unigrams for this. 2015-03-24 06:29:01 +03:00			`elif name == 'debug':`
			`return pf.unigrams`
* Work on parser. Up to 92 UAS on YM labels 2014-12-18 01:05:31 +03:00			`else:`
* Work on refactored parser, where TransitionSystem can be easily subclassed 2015-02-21 07:30:31 +03:00			`return (pf.unigrams + pf.s0_n0 + pf.s1_n0 + pf.s0_n1 + pf.n0_n1 + \`
			`pf.tree_shape + pf.trigrams)`
* Work on greedy parser 2014-12-16 14:44:43 +03:00

* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`cdef class Parser:`
* Use StringStore to encode label names, instead of label_ids 2015-03-14 18:06:35 +03:00			`def __init__(self, StringStore strings, model_dir, transition_system):`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`assert os.path.exists(model_dir) and os.path.isdir(model_dir)`
			`self.cfg = Config.read(model_dir, 'config')`
* Use StringStore to encode label names, instead of label_ids 2015-03-14 18:06:35 +03:00			`self.moves = transition_system(strings, self.cfg.labels)`
* Work on refactored parser, where TransitionSystem can be easily subclassed 2015-02-21 07:30:31 +03:00			`templates = get_templates(self.cfg.features)`
			`self.model = Model(self.moves.n_moves, templates, model_dir)`
* Work on greedy parser 2014-12-16 14:44:43 +03:00
* Work on word vectors, and other stuff 2015-01-17 08:21:17 +03:00			`def __call__(self, Tokens tokens):`
* Fix issue #19: Allow parsing/pos tagging of empty strings 2015-02-10 18:15:58 +03:00			`if tokens.length == 0:`
			`return 0`
* Don't automatically push words when stack is empty, as it messes up beam parsing. Add hash method to beam state. 2015-06-08 15:49:04 +03:00			`if self.cfg.get('beam_width', 1) < 1:`
* Revise greedy_parse/beam_parse ownership goof 2015-06-02 02:34:19 +03:00			`self._greedy_parse(tokens)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`else:`
* Revise greedy_parse/beam_parse ownership goof 2015-06-02 02:34:19 +03:00			`self._beam_parse(tokens)`

			`def train(self, Tokens tokens, GoldParse gold):`
			`self.moves.preprocess_gold(gold)`
* Don't automatically push words when stack is empty, as it messes up beam parsing. Add hash method to beam state. 2015-06-08 15:49:04 +03:00			`if self.cfg.beam_width < 1:`
* Revise greedy_parse/beam_parse ownership goof 2015-06-02 02:34:19 +03:00			`return self._greedy_train(tokens, gold)`
			`else:`
			`return self._beam_train(tokens, gold)`
* Fix issue #19: Allow parsing/pos tagging of empty strings 2015-02-10 18:15:58 +03:00
* Revise greedy_parse/beam_parse ownership goof 2015-06-02 02:34:19 +03:00			`cdef int _greedy_parse(self, Tokens tokens) except -1:`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`cdef atom_t[CONTEXT_SIZE] context`
			`cdef int n_feats`
			`cdef Pool mem = Pool()`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)`
			`self.moves.initialize_state(stcls)`
* Work on refactored parser, where TransitionSystem can be easily subclassed 2015-02-21 07:30:31 +03:00			`cdef Transition guess`
* Move StateClass into interface of transition functions 2015-06-10 02:35:28 +03:00			`words = [w.orth_ for w in tokens]`
			`while not stcls.is_final():`
			`#print stcls.print_state(words)`
* Remove version of fill_context that takes State* 2015-06-10 02:39:07 +03:00			`fill_context(context, stcls)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`scores = self.model.score(context)`
* Move StateClass into the interface for is_valid 2015-06-10 00:23:28 +03:00			`guess = self.moves.best_valid(scores, stcls)`
* Move StateClass into interface of transition functions 2015-06-10 02:35:28 +03:00			`guess.do(stcls, guess.label)`
			`self.moves.finalize_state(stcls)`
			`tokens.set_parse(stcls._sent)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
* Revise greedy_parse/beam_parse ownership goof 2015-06-02 02:34:19 +03:00			`cdef int _beam_parse(self, Tokens tokens) except -1:`
* Fix bugs in new greedy/beam parser 2015-06-02 03:01:33 +03:00			`cdef Beam beam = Beam(self.moves.n_moves, self.cfg.beam_width)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`beam.initialize(_init_state, tokens.length, tokens.data)`
* Bug fixes to beam parser. Search still broken on non-gold sentences 2015-06-07 20:12:59 +03:00			`beam.check_done(_check_final_state, NULL)`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`words = [w.orth_ for w in tokens]`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`while not beam.is_done:`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`self._advance_beam(beam, None, False, words)`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`state = <StateClass>beam.at(0)`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`self.moves.finalize_state(state)`
			`tokens.set_parse(state._sent)`
			`_cleanup(beam)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
			`def _greedy_train(self, Tokens tokens, GoldParse gold):`
* Work on greedy parser 2014-12-16 14:44:43 +03:00			`cdef Pool mem = Pool()`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)`
			`self.moves.initialize_state(stcls)`
* Clean up GreedyParser.train function a bit 2015-03-24 07:11:37 +03:00
			`cdef int cost`
			`cdef const Feature* feats`
			`cdef const weight_t* scores`
			`cdef Transition guess`
			`cdef Transition best`
			`cdef atom_t[CONTEXT_SIZE] context`
* Move spacy.syntax.conll to spacy.gold 2015-05-24 22:35:02 +03:00			`loss = 0`
* Move StateClass into interface of transition functions 2015-06-10 02:35:28 +03:00			`words = [w.orth_ for w in tokens]`
			`while not stcls.is_final():`
* Remove version of fill_context that takes State* 2015-06-10 02:39:07 +03:00			`fill_context(context, stcls)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`scores = self.model.score(context)`
* Move StateClass into the interface for is_valid 2015-06-10 00:23:28 +03:00			`guess = self.moves.best_valid(scores, stcls)`
* Cost functions now take StateClass argument, instead of State*. 2015-06-10 01:40:43 +03:00			`best = self.moves.best_gold(scores, stcls, gold)`
			`cost = guess.get_cost(stcls, &gold.c, guess.label)`
* Work on refactored parser, where TransitionSystem can be easily subclassed 2015-02-21 07:30:31 +03:00			`self.model.update(context, guess.clas, best.clas, cost)`
* Move StateClass into interface of transition functions 2015-06-10 02:35:28 +03:00			`guess.do(stcls, guess.label)`
* Move spacy.syntax.conll to spacy.gold 2015-05-24 22:35:02 +03:00			`loss += cost`
			`return loss`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
			`def _beam_train(self, Tokens tokens, GoldParse gold_parse):`
* Fix bugs in new greedy/beam parser 2015-06-02 03:01:33 +03:00			`cdef Beam pred = Beam(self.moves.n_moves, self.cfg.beam_width)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`pred.initialize(_init_state, tokens.length, tokens.data)`
* Bug fixes to beam parser. Search still broken on non-gold sentences 2015-06-07 20:12:59 +03:00			`pred.check_done(_check_final_state, NULL)`
* Fix bugs in new greedy/beam parser 2015-06-02 03:01:33 +03:00			`cdef Beam gold = Beam(self.moves.n_moves, self.cfg.beam_width)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`gold.initialize(_init_state, tokens.length, tokens.data)`
* Bug fixes to beam parser. Search still broken on non-gold sentences 2015-06-07 20:12:59 +03:00			`gold.check_done(_check_final_state, NULL)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
			`violn = MaxViolation()`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`words = [w.orth_ for w in tokens]`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`while not pred.is_done and not gold.is_done:`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`self._advance_beam(pred, gold_parse, False, words)`
			`self._advance_beam(gold, gold_parse, True, words)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`violn.check(pred, gold)`
* Refactor _advance_beam function 2015-06-02 19:38:41 +03:00			`if pred.loss >= 1:`
* Impove efficiency of dynamic oracle, making beam training faster 2015-06-04 22:15:14 +03:00			`counts = {clas: {} for clas in range(self.model.n_classes)}`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`self._count_feats(counts, tokens, violn.g_hist, 1)`
			`self._count_feats(counts, tokens, violn.p_hist, -1)`
* Impove efficiency of dynamic oracle, making beam training faster 2015-06-04 22:15:14 +03:00			`else:`
			`counts = {}`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`self.model._model.update(counts)`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`_cleanup(pred)`
			`_cleanup(gold)`
* Refactor _advance_beam function 2015-06-02 19:38:41 +03:00			`return pred.loss`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`def _advance_beam(self, Beam beam, GoldParse gold, bint follow_gold, words):`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`cdef atom_t[CONTEXT_SIZE] context`
			`cdef int i, j, cost`
			`cdef bint is_valid`
			`cdef const Transition* move`
			`for i in range(beam.size):`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`stcls = <StateClass>beam.at(i)`
			`if not stcls.is_final():`
* Remove version of fill_context that takes State* 2015-06-10 02:39:07 +03:00			`fill_context(context, stcls)`
* Bug fixes to beam parser. Search still broken on non-gold sentences 2015-06-07 20:12:59 +03:00			`self.model.set_scores(beam.scores[i], context)`
* Move StateClass into the interface for is_valid 2015-06-10 00:23:28 +03:00			`self.moves.set_valid(beam.is_valid[i], stcls)`
* Impove efficiency of dynamic oracle, making beam training faster 2015-06-04 22:15:14 +03:00			`if gold is not None:`
* Refactor _advance_beam function 2015-06-02 19:38:41 +03:00			`for i in range(beam.size):`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`stcls = <StateClass>beam.at(i)`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`if not stcls.is_final():`
			`self.moves.set_costs(beam.costs[i], stcls, gold)`
			`if follow_gold:`
			`for j in range(self.moves.n_moves):`
			`beam.is_valid[i][j] *= beam.costs[i][j] == 0`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`beam.advance(_transition_state, _hash_state, <void*>self.moves.c)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`beam.check_done(_check_final_state, NULL)`

			`def _count_feats(self, dict counts, Tokens tokens, list hist, int inc):`
			`cdef atom_t[CONTEXT_SIZE] context`
			`cdef Pool mem = Pool()`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`cdef StateClass stcls = StateClass.init(tokens.data, tokens.length)`
			`self.moves.initialize_state(stcls)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00
			`cdef class_t clas`
			`cdef int n_feats`
			`for clas in hist:`
* Remove version of fill_context that takes State* 2015-06-10 02:39:07 +03:00			`fill_context(context, stcls)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`feats = self.model._extractor.get_feats(context, &n_feats)`
* Impove efficiency of dynamic oracle, making beam training faster 2015-06-04 22:15:14 +03:00			`count_feats(counts[clas], feats, n_feats, inc)`
* Move StateClass into interface of transition functions 2015-06-10 02:35:28 +03:00			`self.moves.c[clas].do(stcls, self.moves.c[clas].label)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00

			`# These are passed as callbacks to thinc.search.Beam`

			`cdef int _transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1:`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`dest = <StateClass>_dest`
			`src = <StateClass>_src`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00			`moves = <const Transition*>_moves`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`dest.clone(src)`
			`moves[clas].do(dest, moves[clas].label)`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00

			`cdef void* _init_state(Pool mem, int length, void* tokens) except NULL:`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`cdef StateClass st = StateClass.init(<const TokenC*>tokens, length)`
* Fix beam search with new StateClass 2015-06-10 07:33:39 +03:00			`st.push()`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`Py_INCREF(st)`
* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`return <void*>st`
* Add beam search capabilities to Parser. Rename GreedyParser to Parser. 2015-06-02 01:28:02 +03:00

* Remove State* from parser.pyx entirely, switching over to StateClass. Beam parsing still untested. 2015-06-10 03:03:38 +03:00			`cdef int _check_final_state(void* _state, void* extra_args) except -1:`
			`return (<StateClass>_state).is_final()`
* Don't automatically push words when stack is empty, as it messes up beam parsing. Add hash method to beam state. 2015-06-08 15:49:04 +03:00

* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`def _cleanup(Beam beam):`
			`for i in range(beam.width):`
			`Py_XDECREF(<PyObject*>beam._states[i].content)`
			`Py_XDECREF(<PyObject*>beam._parents[i].content)`

* Don't automatically push words when stack is empty, as it messes up beam parsing. Add hash method to beam state. 2015-06-08 15:49:04 +03:00			`cdef hash_t _hash_state(void* _state, void* _) except 0:`
* Greedy parsing working with new StateClass. Beam parsing broken 2015-06-10 05:20:23 +03:00			`return <hash_t>_state`

			`#state = <const State*>_state`
			`#cdef atom_t[10] rep`

			`#rep[0] = state.stack[0] if state.stack_len >= 1 else 0`
			`#rep[1] = state.stack[-1] if state.stack_len >= 2 else 0`
			`#rep[2] = state.stack[-2] if state.stack_len >= 3 else 0`
			`#rep[3] = state.i`
			`#rep[4] = state.sent[state.stack[0]].l_kids if state.stack_len >= 1 else 0`
			`#rep[5] = state.sent[state.stack[0]].r_kids if state.stack_len >= 1 else 0`
			`#rep[6] = state.sent[state.stack[0]].dep if state.stack_len >= 1 else 0`
			`#rep[7] = state.sent[state.stack[-1]].dep if state.stack_len >= 2 else 0`
			`#if get_left(state, get_n0(state), 1) != NULL:`
			`# rep[8] = get_left(state, get_n0(state), 1).dep`
			`#else:`
			`# rep[8] = 0`
			`#rep[9] = state.sent[state.i].l_kids`
			`#return hash64(rep, sizeof(atom_t) * 10, 0)`