Use increasing beam_update_prob in ud-train

2025-07-17 19:52:18 +03:00 · 2018-05-16 23:21:53 +02:00 · 2018-05-16 23:21:53 +02:00 · b9e415a5f8
commit b9e415a5f8
parent a7aa49c419 a0b8a26655
2 changed files with 3 additions and 1 deletions
--- a/setup.py
+++ b/setup.py
@ -196,6 +196,7 @@ def setup_package():
                'plac<1.0.0,>=0.9.6',
                'pathlib',
                'ujson>=1.35',
+                'regex==2017.4.5',
                'dill>=0.2,<0.3'],
            setup_requires=['wheel'],
            classifiers=[
--- a/spacy/cli/ud_train.py
+++ b/spacy/cli/ud_train.py
@ -370,7 +370,7 @@ def main(ud_dir, parses_dir, config, corpus, limit=0, use_gpu=-1, vectors_dir=No
    optimizer = initialize_pipeline(nlp, docs, golds, config, use_gpu)

    batch_sizes = compounding(config.batch_size//10, config.batch_size, 1.001)
-    nlp.parser.cfg['beam_update_prob'] = 1.0
+    beam_prob = compounding(0.2, 0.8, 1.001)
    for i in range(config.nr_epoch):
        docs, golds = read_data(nlp, paths.train.conllu.open(), paths.train.text.open(),
                                max_doc_length=config.max_doc_length, limit=limit,
@ -385,6 +385,7 @@ def main(ud_dir, parses_dir, config, corpus, limit=0, use_gpu=-1, vectors_dir=No
            for batch in batches:
                batch_docs, batch_gold = zip(*batch)
                pbar.update(sum(len(doc) for doc in batch_docs))
+                nlp.parser.cfg['beam_update_prob'] = next(beam_prob)
                nlp.update(batch_docs, batch_gold, sgd=optimizer,
                           drop=config.dropout, losses=losses)