* Refactor away from the _ml module, to use thinc 4.0. Still some work needs to be done, e.g. to add __reduce__ to the models, more testing, etc.

This commit is contained in:
Matthew Honnibal 2015-11-07 03:24:30 +11:00
parent c339783bbe
commit 3c162dcac3
9 changed files with 155 additions and 142 deletions

View File

@ -210,7 +210,6 @@ MOD_NAMES = ['spacy.parts_of_speech', 'spacy.strings',
'spacy.lexeme', 'spacy.vocab', 'spacy.attrs', 'spacy.lexeme', 'spacy.vocab', 'spacy.attrs',
'spacy.morphology', 'spacy.tagger', 'spacy.morphology', 'spacy.tagger',
'spacy.syntax.stateclass', 'spacy.syntax.stateclass',
'spacy._ml',
'spacy.tokenizer', 'spacy.tokenizer',
'spacy.syntax.parser', 'spacy.syntax.parser',
'spacy.syntax.transition_system', 'spacy.syntax.transition_system',

View File

@ -1,3 +1,4 @@
from __future__ import absolute_import
from os import path from os import path
from warnings import warn from warnings import warn
import io import io
@ -13,7 +14,6 @@ from .syntax.parser import Parser
from .tagger import Tagger from .tagger import Tagger
from .matcher import Matcher from .matcher import Matcher
from .serialize.packer import Packer from .serialize.packer import Packer
from ._ml import Model
from . import attrs from . import attrs
from . import orth from . import orth
from .syntax.ner import BiluoPushDown from .syntax.ner import BiluoPushDown
@ -245,9 +245,12 @@ class Language(object):
def end_training(self, data_dir=None): def end_training(self, data_dir=None):
if data_dir is None: if data_dir is None:
data_dir = self.data_dir data_dir = self.data_dir
self.parser.model.end_training(path.join(data_dir, 'deps', 'model')) self.parser.model.end_training()
self.entity.model.end_training(path.join(data_dir, 'ner', 'model')) self.parser.model.dump(path.join(data_dir, 'deps', 'model'))
self.tagger.model.end_training(path.join(data_dir, 'pos', 'model')) self.entity.model.end_training()
self.entity.model.dump(path.join(data_dir, 'ner', 'model'))
self.tagger.model.end_training()
self.tagger.model.dump(path.join(data_dir, 'pos', 'model'))
strings_loc = path.join(data_dir, 'vocab', 'strings.json') strings_loc = path.join(data_dir, 'vocab', 'strings.json')
with io.open(strings_loc, 'w', encoding='utf8') as file_: with io.open(strings_loc, 'w', encoding='utf8') as file_:

View File

@ -78,7 +78,7 @@ cdef class StringStore:
def __init__(self, strings=None): def __init__(self, strings=None):
self.mem = Pool() self.mem = Pool()
self._map = PreshMap() self._map = PreshMap()
self._resize_at = 10 self._resize_at = 10000
self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str)) self.c = <Utf8Str*>self.mem.alloc(self._resize_at, sizeof(Utf8Str))
self.size = 1 self.size = 1
if strings is not None: if strings is not None:

View File

@ -1,18 +1,17 @@
from thinc.search cimport Beam from thinc.search cimport Beam
from thinc.api cimport AveragedPerceptron
from thinc.api cimport Example, ExampleC
from .._ml cimport Model from .stateclass cimport StateClass
from .arc_eager cimport TransitionSystem from .arc_eager cimport TransitionSystem
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..structs cimport TokenC from ..structs cimport TokenC
from thinc.api cimport Example, ExampleC
from .stateclass cimport StateClass
cdef class ParserModel(AveragedPerceptron):
cdef void set_features(self, ExampleC* eg, StateClass stcls) except *
cdef class Parser: cdef class Parser:
cdef readonly Model model cdef readonly ParserModel model
cdef readonly TransitionSystem moves cdef readonly TransitionSystem moves
cdef void parse(self, StateClass stcls, ExampleC eg) nogil
cdef void predict(self, StateClass stcls, ExampleC* eg) nogil

View File

@ -18,18 +18,15 @@ import sys
from cymem.cymem cimport Pool, Address from cymem.cymem cimport Pool, Address
from murmurhash.mrmr cimport hash64 from murmurhash.mrmr cimport hash64
from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t from thinc.typedefs cimport weight_t, class_t, feat_t, atom_t, hash_t
from thinc.features cimport ConjunctionExtracter
from util import Config from util import Config
from thinc.api cimport Example, ExampleC
from ..structs cimport TokenC from ..structs cimport TokenC
from ..tokens.doc cimport Doc from ..tokens.doc cimport Doc
from ..strings cimport StringStore from ..strings cimport StringStore
from .transition_system import OracleError from .transition_system import OracleError
from .transition_system cimport TransitionSystem, Transition from .transition_system cimport TransitionSystem, Transition
@ -40,7 +37,6 @@ from ._parse_features cimport CONTEXT_SIZE
from ._parse_features cimport fill_context from ._parse_features cimport fill_context
from .stateclass cimport StateClass from .stateclass cimport StateClass
from thinc.learner cimport arg_max_if_true
DEBUG = False DEBUG = False
@ -66,8 +62,18 @@ def ParserFactory(transition_system):
return lambda strings, dir_: Parser(strings, dir_, transition_system) return lambda strings, dir_: Parser(strings, dir_, transition_system)
cdef class ParserModel(AveragedPerceptron):
def __init__(self, n_classes, templates):
AveragedPerceptron.__init__(self, n_classes,
ConjunctionExtracter(CONTEXT_SIZE, templates))
cdef void set_features(self, ExampleC* eg, StateClass stcls) except *:
fill_context(eg.atoms, stcls)
eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
cdef class Parser: cdef class Parser:
def __init__(self, StringStore strings, transition_system, model): def __init__(self, StringStore strings, transition_system, ParserModel model):
self.moves = transition_system self.moves = transition_system
self.model = model self.model = model
@ -80,54 +86,50 @@ cdef class Parser:
cfg = Config.read(model_dir, 'config') cfg = Config.read(model_dir, 'config')
moves = transition_system(strings, cfg.labels) moves = transition_system(strings, cfg.labels)
templates = get_templates(cfg.features) templates = get_templates(cfg.features)
model = Model(moves.n_moves, templates, model_dir) model = ParserModel(moves.n_moves, templates)
if path.exists(path.join(model_dir, 'model')):
model.load(path.join(model_dir, 'model'))
return cls(strings, moves, model) return cls(strings, moves, model)
def __reduce__(self):
return (Parser, (self.moves.strings, self.moves, self.model), None, None)
def __call__(self, Doc tokens): def __call__(self, Doc tokens):
cdef StateClass stcls = StateClass.init(tokens.c, tokens.length) cdef StateClass stcls = StateClass.init(tokens.c, tokens.length)
self.moves.initialize_state(stcls) self.moves.initialize_state(stcls)
cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, cdef Pool mem = Pool()
self.model.n_feats, self.model.n_feats) cdef ExampleC eg = self.model.allocate(mem)
self.parse(stcls, eg.c)
tokens.set_parse(stcls._sent)
def __reduce__(self):
return (Parser, (self.moves.strings, self.moves, self.model), None, None)
cdef void predict(self, StateClass stcls, ExampleC* eg) nogil:
memset(eg.scores, 0, eg.nr_class * sizeof(weight_t))
self.moves.set_valid(eg.is_valid, stcls)
fill_context(eg.atoms, stcls)
self.model.set_scores(eg.scores, eg.atoms)
eg.guess = arg_max_if_true(eg.scores, eg.is_valid, self.model.n_classes)
cdef void parse(self, StateClass stcls, ExampleC eg) nogil:
while not stcls.is_final(): while not stcls.is_final():
self.predict(stcls, &eg) self.model.set_features(&eg, stcls)
if not eg.is_valid[eg.guess]: self.moves.set_valid(eg.is_valid, stcls)
break self.model.set_prediction(&eg)
self.moves.c[eg.guess].do(stcls, self.moves.c[eg.guess].label)
self.moves.finalize_state(stcls)
assert eg.is_valid[eg.guess]
action = self.moves.c[eg.guess]
action.do(stcls, action.label)
self.moves.finalize_state(stcls)
tokens.set_parse(stcls._sent)
def train(self, Doc tokens, GoldParse gold): def train(self, Doc tokens, GoldParse gold):
self.moves.preprocess_gold(gold) self.moves.preprocess_gold(gold)
cdef StateClass stcls = StateClass.init(tokens.c, tokens.length) cdef StateClass stcls = StateClass.init(tokens.c, tokens.length)
self.moves.initialize_state(stcls) self.moves.initialize_state(stcls)
cdef Example eg = Example(self.model.n_classes, CONTEXT_SIZE, cdef Pool mem = Pool()
self.model.n_feats, self.model.n_feats) cdef ExampleC eg = self.model.allocate(mem)
cdef weight_t loss = 0 cdef weight_t loss = 0
words = [w.orth_ for w in tokens] words = [w.orth_ for w in tokens]
cdef Transition G cdef Transition action
while not stcls.is_final(): while not stcls.is_final():
memset(eg.c.scores, 0, eg.c.nr_class * sizeof(weight_t)) self.model.set_features(&eg, stcls)
self.moves.set_costs(eg.c.is_valid, eg.c.costs, stcls, gold) self.moves.set_costs(eg.is_valid, eg.costs, stcls, gold)
fill_context(eg.c.atoms, stcls) self.model.set_prediction(&eg)
self.model.train(eg) self.model.update(&eg)
G = self.moves.c[eg.c.guess]
self.moves.c[eg.c.guess].do(stcls, self.moves.c[eg.c.guess].label) action = self.moves.c[eg.guess]
loss += eg.c.loss action.do(stcls, action.label)
loss += eg.costs[eg.guess]
return loss return loss
def step_through(self, Doc doc): def step_through(self, Doc doc):
@ -176,7 +178,10 @@ cdef class StepwiseState:
for i in range(self.stcls.length)] for i in range(self.stcls.length)]
def predict(self): def predict(self):
self.parser.predict(self.stcls, &self.eg.c) self.parser.model.set_features(&self.eg.c, self.stcls)
self.parser.moves.set_valid(self.eg.c.is_valid, self.stcls)
self.parser.model.set_prediction(&self.eg.c)
action = self.parser.moves.c[self.eg.c.guess] action = self.parser.moves.c[self.eg.c.guess]
return self.parser.moves.move_name(action.move, action.label) return self.parser.moves.move_name(action.move, action.label)

View File

@ -1,9 +1,17 @@
from ._ml cimport Model from thinc.api cimport AveragedPerceptron
from thinc.api cimport ExampleC
from .structs cimport TokenC from .structs cimport TokenC
from .vocab cimport Vocab from .vocab cimport Vocab
cdef class TaggerModel(AveragedPerceptron):
cdef void set_features(self, ExampleC* eg, const TokenC* tokens, int i) except *
cdef void set_costs(self, ExampleC* eg, int gold) except *
cdef void update(self, ExampleC* eg) except *
cdef class Tagger: cdef class Tagger:
cdef readonly Vocab vocab cdef readonly Vocab vocab
cdef readonly Model model cdef readonly TaggerModel model
cdef public dict freqs cdef public dict freqs

View File

@ -1,10 +1,12 @@
import json import json
from os import path from os import path
from collections import defaultdict from collections import defaultdict
from libc.string cimport memset
from cymem.cymem cimport Pool
from thinc.typedefs cimport atom_t, weight_t from thinc.typedefs cimport atom_t, weight_t
from thinc.learner cimport arg_max, arg_max_if_true, arg_max_if_zero from thinc.api cimport Example, ExampleC
from thinc.api cimport Example from thinc.features cimport ConjunctionExtracter
from .typedefs cimport attr_t from .typedefs cimport attr_t
from .tokens.doc cimport Doc from .tokens.doc cimport Doc
@ -64,6 +66,44 @@ cpdef enum:
N_CONTEXT_FIELDS N_CONTEXT_FIELDS
cdef class TaggerModel(AveragedPerceptron):
def __init__(self, n_classes, templates):
AveragedPerceptron.__init__(self, n_classes,
ConjunctionExtracter(N_CONTEXT_FIELDS, templates))
cdef void set_features(self, ExampleC* eg, const TokenC* tokens, int i) except *:
_fill_from_token(&eg.atoms[P2_orth], &tokens[i-2])
_fill_from_token(&eg.atoms[P1_orth], &tokens[i-1])
_fill_from_token(&eg.atoms[W_orth], &tokens[i])
_fill_from_token(&eg.atoms[N1_orth], &tokens[i+1])
_fill_from_token(&eg.atoms[N2_orth], &tokens[i+2])
eg.nr_feat = self.extracter.set_features(eg.features, eg.atoms)
cdef void update(self, ExampleC* eg) except *:
self.updater.update(eg)
cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
context[0] = t.lex.lower
context[1] = t.lex.cluster
context[2] = t.lex.shape
context[3] = t.lex.prefix
context[4] = t.lex.suffix
context[5] = t.tag
context[6] = t.lemma
if t.lex.flags & (1 << IS_ALPHA):
context[7] = 1
elif t.lex.flags & (1 << IS_PUNCT):
context[7] = 2
elif t.lex.flags & (1 << LIKE_URL):
context[7] = 3
elif t.lex.flags & (1 << LIKE_NUM):
context[7] = 4
else:
context[7] = 0
cdef class Tagger: cdef class Tagger:
"""A part-of-speech tagger for English""" """A part-of-speech tagger for English"""
@classmethod @classmethod
@ -105,7 +145,7 @@ cdef class Tagger:
@classmethod @classmethod
def blank(cls, vocab, templates): def blank(cls, vocab, templates):
model = Model(vocab.morphology.n_tags, templates, model_loc=None) model = TaggerModel(vocab.morphology.n_tags, templates)
return cls(vocab, model) return cls(vocab, model)
@classmethod @classmethod
@ -114,10 +154,12 @@ cdef class Tagger:
templates = json.loads(open(path.join(data_dir, 'templates.json'))) templates = json.loads(open(path.join(data_dir, 'templates.json')))
else: else:
templates = cls.default_templates() templates = cls.default_templates()
model = Model(vocab.morphology.n_tags, templates, data_dir) model = TaggerModel(vocab.morphology.n_tags, templates)
if path.exists(path.join(data_dir, 'model')):
model.load(path.join(data_dir, 'model'))
return cls(vocab, model) return cls(vocab, model)
def __init__(self, Vocab vocab, model): def __init__(self, Vocab vocab, TaggerModel model):
self.vocab = vocab self.vocab = vocab
self.model = model self.model = model
@ -131,27 +173,6 @@ cdef class Tagger:
def tag_names(self): def tag_names(self):
return self.vocab.morphology.tag_names return self.vocab.morphology.tag_names
def __call__(self, Doc tokens):
"""Apply the tagger, setting the POS tags onto the Doc object.
Args:
tokens (Doc): The tokens to be tagged.
"""
if tokens.length == 0:
return 0
cdef Example eg = self.model._eg
cdef int i
for i in range(tokens.length):
if tokens.c[i].pos == 0:
eg.wipe()
fill_atoms(eg.c.atoms, tokens.c, i)
self.model(eg)
self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess)
tokens.is_tagged = True
tokens._py_tokens = [None] * tokens.length
def __reduce__(self): def __reduce__(self):
return (self.__class__, (self.vocab, self.model), None, None) return (self.__class__, (self.vocab, self.model), None, None)
@ -162,53 +183,45 @@ cdef class Tagger:
tokens.is_tagged = True tokens.is_tagged = True
tokens._py_tokens = [None] * tokens.length tokens._py_tokens = [None] * tokens.length
def __call__(self, Doc tokens):
"""Apply the tagger, setting the POS tags onto the Doc object.
Args:
tokens (Doc): The tokens to be tagged.
"""
if tokens.length == 0:
return 0
cdef Pool mem = Pool()
cdef ExampleC eg
cdef int i, tag
for i in range(tokens.length):
if tokens.c[i].pos == 0:
eg = self.model.allocate(mem)
self.model.set_features(&eg, tokens.c, i)
self.model.set_prediction(&eg)
self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
tokens.is_tagged = True
tokens._py_tokens = [None] * tokens.length
def train(self, Doc tokens, object gold_tag_strs): def train(self, Doc tokens, object gold_tag_strs):
assert len(tokens) == len(gold_tag_strs) assert len(tokens) == len(gold_tag_strs)
cdef int i golds = [self.tag_names.index(g) if g is not None else -1 for g in gold_tag_strs]
cdef int loss cdef int correct = 0
cdef const weight_t* scores cdef Pool mem = Pool()
try: cdef ExampleC eg
golds = [self.tag_names.index(g) if g is not None else -1 for g in gold_tag_strs]
except ValueError:
raise ValueError(
[g for g in gold_tag_strs if g is not None and g not in self.tag_names])
correct = 0
cdef Example eg = self.model._eg
for i in range(tokens.length): for i in range(tokens.length):
eg.wipe() eg = self.model.allocate(mem)
fill_atoms(eg.c.atoms, tokens.c, i) self.model.set_features(&eg, tokens.c, i)
self.train(eg) self.model.set_costs(&eg, golds[i])
self.model.set_prediction(&eg)
self.model.update(&eg)
self.vocab.morphology.assign_tag(&tokens.c[i], eg.c.guess) self.vocab.morphology.assign_tag(&tokens.c[i], eg.guess)
correct += eg.c.cost == 0 correct += eg.cost == 0
self.freqs[TAG][tokens.c[i].tag] += 1 self.freqs[TAG][tokens.c[i].tag] += 1
tokens.is_tagged = True
tokens._py_tokens = [None] * tokens.length
return correct return correct
cdef inline void fill_atoms(atom_t* atoms, const TokenC* tokens, int i) nogil:
_fill_from_token(&atoms[P2_orth], &tokens[i-2])
_fill_from_token(&atoms[P1_orth], &tokens[i-1])
_fill_from_token(&atoms[W_orth], &tokens[i])
_fill_from_token(&atoms[N1_orth], &tokens[i+1])
_fill_from_token(&atoms[N2_orth], &tokens[i+2])
cdef inline void _fill_from_token(atom_t* context, const TokenC* t) nogil:
context[0] = t.lex.lower
context[1] = t.lex.cluster
context[2] = t.lex.shape
context[3] = t.lex.prefix
context[4] = t.lex.suffix
context[5] = t.tag
context[6] = t.lemma
if t.lex.flags & (1 << IS_ALPHA):
context[7] = 1
elif t.lex.flags & (1 << IS_PUNCT):
context[7] = 2
elif t.lex.flags & (1 << LIKE_URL):
context[7] = 3
elif t.lex.flags & (1 << LIKE_NUM):
context[7] = 4
else:
context[7] = 0

View File

@ -11,7 +11,6 @@ from spacy.strings import StringStore
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.tokenizer import Tokenizer from spacy.tokenizer import Tokenizer
from spacy.syntax.arc_eager import ArcEager from spacy.syntax.arc_eager import ArcEager
from spacy._ml import Model
from spacy.tagger import Tagger from spacy.tagger import Tagger
from spacy.syntax.parser import Parser from spacy.syntax.parser import Parser
from spacy.matcher import Matcher from spacy.matcher import Matcher

View File

@ -12,16 +12,13 @@ from spacy.strings import StringStore
from spacy.vocab import Vocab from spacy.vocab import Vocab
from spacy.tokenizer import Tokenizer from spacy.tokenizer import Tokenizer
from spacy.syntax.arc_eager import ArcEager from spacy.syntax.arc_eager import ArcEager
from spacy._ml import Model
from spacy.tagger import Tagger from spacy.tagger import Tagger
from spacy.syntax.parser import Parser from spacy.syntax.parser import Parser, ParserModel
from spacy.matcher import Matcher from spacy.matcher import Matcher
from spacy.syntax.parser import get_templates from spacy.syntax.parser import get_templates
from spacy.en import English from spacy.en import English
from thinc.learner import LinearModel
class TestLoadVocab(unittest.TestCase): class TestLoadVocab(unittest.TestCase):
def test_load(self): def test_load(self):
@ -54,7 +51,6 @@ class TestLoadParser(unittest.TestCase):
if path.exists(path.join(data_dir, 'deps')): if path.exists(path.join(data_dir, 'deps')):
parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager) parser = Parser.from_dir(path.join(data_dir, 'deps'), vocab.strings, ArcEager)
def test_load_careful(self):
config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1} config_data = {"labels": {"0": {"": True}, "1": {"": True}, "2": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "dobj": True, "neg": True, "csubjpass": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "relcl": True, "quantmod": True, "acomp": True, "compound": True, "pcomp": True, "intj": True, "poss": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "amod": True, "dative": True, "pobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True, "acl": True}, "3": {"cc": True, "agent": True, "ccomp": True, "prt": True, "meta": True, "nsubjpass": True, "csubj": True, "conj": True, "acl": True, "poss": True, "neg": True, "mark": True, "auxpass": True, "advcl": True, "aux": True, "amod": True, "ROOT": True, "prep": True, "parataxis": True, "xcomp": True, "nsubj": True, "nummod": True, "advmod": True, "punct": True, "quantmod": True, "acomp": True, "pcomp": True, "intj": True, "relcl": True, "npadvmod": True, "case": True, "attr": True, "dep": True, "appos": True, "det": True, "nmod": True, "dobj": True, "dative": True, "pobj": True, "iobj": True, "expl": True, "predet": True, "preconj": True, "oprd": True}, "4": {"ROOT": True}}, "seed": 0, "features": "basic", "beam_width": 1}
data_dir = English.default_data_dir() data_dir = English.default_data_dir()
@ -63,20 +59,11 @@ class TestLoadParser(unittest.TestCase):
moves = ArcEager(vocab.strings, config_data['labels']) moves = ArcEager(vocab.strings, config_data['labels'])
templates = get_templates(config_data['features']) templates = get_templates(config_data['features'])
model = Model(moves.n_moves, templates, path.join(data_dir, 'deps')) model = ParserModel(moves.n_moves, templates)
model.load(path.join(data_dir, 'deps', 'model'))
parser = Parser(vocab.strings, moves, model) parser = Parser(vocab.strings, moves, model)
def test_thinc_load(self):
data_dir = English.default_data_dir()
model_loc = path.join(data_dir, 'deps', 'model')
# n classes. moves.n_moves above
# n features. len(templates) + 1 above
if path.exists(model_loc):
model = LinearModel(92, 116)
model.load(model_loc)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()