Add separate noise vs orth level to train CLI

This commit is contained in:
Adriane Boyd 2019-08-29 09:10:35 +02:00
parent 7d6d438566
commit f3906950d3

View File

@ -65,6 +65,7 @@ from .. import about
str, str,
), ),
noise_level=("Amount of corruption for data augmentation", "option", "nl", float), noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
orth_variant_level=("Amount of orthography variation for data augmentation", "option", "ovl", float),
eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str), eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
gold_preproc=("Use gold preprocessing", "flag", "G", bool), gold_preproc=("Use gold preprocessing", "flag", "G", bool),
learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool), learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
@ -90,6 +91,7 @@ def train(
parser_multitasks="", parser_multitasks="",
entity_multitasks="", entity_multitasks="",
noise_level=0.0, noise_level=0.0,
orth_variant_level=0.0,
eval_beam_widths="", eval_beam_widths="",
gold_preproc=False, gold_preproc=False,
learn_tokens=False, learn_tokens=False,
@ -240,7 +242,7 @@ def train(
best_score = 0.0 best_score = 0.0
for i in range(n_iter): for i in range(n_iter):
train_docs = corpus.train_docs( train_docs = corpus.train_docs(
nlp, orth_variant_level=noise_level, gold_preproc=gold_preproc, max_length=0 nlp, noise_level=noise_level, orth_variant_level=orth_variant_level, gold_preproc=gold_preproc, max_length=0
) )
if raw_text: if raw_text:
random.shuffle(raw_text) random.shuffle(raw_text)