mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
Tmp. Working on NN NER.
This commit is contained in:
parent
b3b180010b
commit
8be231d490
|
@ -91,7 +91,7 @@ def train(Language, gold_tuples, model_dir, dev_loc, n_iter=15, feat_set=u'basic
|
||||||
|
|
||||||
Config.write(dep_model_dir, 'config', feat_set=feat_set, seed=seed,
|
Config.write(dep_model_dir, 'config', feat_set=feat_set, seed=seed,
|
||||||
labels=ArcEager.get_labels(gold_tuples),
|
labels=ArcEager.get_labels(gold_tuples),
|
||||||
rho=0.0, eta=1.0, mu=0.9, noise=0.0,
|
rho=1e-5, eta=1.0, mu=0.9, noise=0.0,
|
||||||
beam_width=beam_width,projectivize=pseudoprojective)
|
beam_width=beam_width,projectivize=pseudoprojective)
|
||||||
#feat_set, slots = get_templates('neural')
|
#feat_set, slots = get_templates('neural')
|
||||||
#vector_widths = [10, 10, 10]
|
#vector_widths = [10, 10, 10]
|
||||||
|
@ -112,13 +112,17 @@ def train(Language, gold_tuples, model_dir, dev_loc, n_iter=15, feat_set=u'basic
|
||||||
|
|
||||||
Config.write(ner_model_dir, 'config', feat_set='ner', seed=seed,
|
Config.write(ner_model_dir, 'config', feat_set='ner', seed=seed,
|
||||||
labels=BiluoPushDown.get_labels(gold_tuples),
|
labels=BiluoPushDown.get_labels(gold_tuples),
|
||||||
beam_width=0, rho=0.0, eta=1.0, mu=0.9, noise=0.0)
|
beam_width=beam_width, rho=1e-8, eta=1.0, mu=0.9, noise=0.0)
|
||||||
|
|
||||||
if n_sents > 0:
|
if n_sents > 0:
|
||||||
gold_tuples = gold_tuples[:n_sents]
|
gold_tuples = gold_tuples[:n_sents]
|
||||||
micro_eval = gold_tuples[:50]
|
micro_eval = gold_tuples[:50]
|
||||||
nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
|
nlp = Language(data_dir=model_dir, tagger=False, parser=False, entity=False)
|
||||||
nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
|
nlp.tagger = Tagger.blank(nlp.vocab, Tagger.default_templates())
|
||||||
|
if beam_width >= 2:
|
||||||
|
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
|
||||||
|
nlp.entity = BeamParser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)
|
||||||
|
else:
|
||||||
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
|
nlp.parser = Parser.from_dir(dep_model_dir, nlp.vocab.strings, ArcEager)
|
||||||
nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)
|
nlp.entity = Parser.from_dir(ner_model_dir, nlp.vocab.strings, BiluoPushDown)
|
||||||
print(nlp.parser.model.widths)
|
print(nlp.parser.model.widths)
|
||||||
|
@ -178,7 +182,7 @@ def _train_epoch(nlp, gold_tuples, eg_seen, itn, dev_loc, micro_eval,
|
||||||
nlp.parser.model.nr_active_feat,
|
nlp.parser.model.nr_active_feat,
|
||||||
nlp.entity.model.nr_active_feat))
|
nlp.entity.model.nr_active_feat))
|
||||||
loss = 0
|
loss = 0
|
||||||
nlp.parser.model.learn_rate *= 0.99
|
#nlp.parser.model.learn_rate *= 0.99
|
||||||
scorer = score_file(nlp, dev_loc)
|
scorer = score_file(nlp, dev_loc)
|
||||||
print('D:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (loss, scorer.uas, scorer.ents_f,
|
print('D:\t%d\t%.3f\t%.3f\t%.3f\t%.3f' % (loss, scorer.uas, scorer.ents_f,
|
||||||
scorer.tags_acc, scorer.token_acc))
|
scorer.tags_acc, scorer.token_acc))
|
||||||
|
@ -271,6 +275,7 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
|
||||||
train_loc=("Location of training file or directory"),
|
train_loc=("Location of training file or directory"),
|
||||||
dev_loc=("Location of development file or directory"),
|
dev_loc=("Location of development file or directory"),
|
||||||
model_dir=("Location of output model directory",),
|
model_dir=("Location of output model directory",),
|
||||||
|
beam_width=("Parser and NER beam width", "option", "k", int),
|
||||||
eval_only=("Skip training, and only evaluate", "flag", "e", bool),
|
eval_only=("Skip training, and only evaluate", "flag", "e", bool),
|
||||||
corruption_level=("Amount of noise to add to training data", "option", "c", float),
|
corruption_level=("Amount of noise to add to training data", "option", "c", float),
|
||||||
gold_preproc=("Use gold-standard sentence boundaries in training?", "flag", "g", bool),
|
gold_preproc=("Use gold-standard sentence boundaries in training?", "flag", "g", bool),
|
||||||
|
@ -282,7 +287,8 @@ def write_parses(Language, dev_loc, model_dir, out_loc):
|
||||||
pseudoprojective=("Use pseudo-projective parsing", "flag", "p", bool),
|
pseudoprojective=("Use pseudo-projective parsing", "flag", "p", bool),
|
||||||
)
|
)
|
||||||
def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
|
def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc="", verbose=False,
|
||||||
debug=False, corruption_level=0.0, gold_preproc=False, eval_only=False, pseudoprojective=False):
|
debug=False, corruption_level=0.0, beam_width=1,
|
||||||
|
gold_preproc=False, eval_only=False, pseudoprojective=False):
|
||||||
lang = spacy.util.get_lang_class(language)
|
lang = spacy.util.get_lang_class(language)
|
||||||
|
|
||||||
if not eval_only:
|
if not eval_only:
|
||||||
|
@ -291,7 +297,8 @@ def main(language, train_loc, dev_loc, model_dir, n_sents=0, n_iter=15, out_loc=
|
||||||
feat_set='basic', #'neural' if not debug else 'debug',
|
feat_set='basic', #'neural' if not debug else 'debug',
|
||||||
gold_preproc=gold_preproc, n_sents=n_sents,
|
gold_preproc=gold_preproc, n_sents=n_sents,
|
||||||
corruption_level=corruption_level, n_iter=n_iter,
|
corruption_level=corruption_level, n_iter=n_iter,
|
||||||
verbose=verbose,pseudoprojective=pseudoprojective)
|
verbose=verbose, pseudoprojective=pseudoprojective,
|
||||||
|
beam_width=beam_width)
|
||||||
if out_loc:
|
if out_loc:
|
||||||
write_parses(lang, dev_loc, model_dir, out_loc)
|
write_parses(lang, dev_loc, model_dir, out_loc)
|
||||||
print(model_dir)
|
print(model_dir)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user