From 0703f5986bb2347f3332cdc3851246c1b19ca4ba Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 00:48:39 +0100
Subject: [PATCH 01/11] Remove hack from beam

---
 spacy/syntax/_beam_utils.pyx | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx
index f06d54d9d..83137a4c4 100644
--- a/spacy/syntax/_beam_utils.pyx
+++ b/spacy/syntax/_beam_utils.pyx
@@ -209,10 +209,6 @@ def update_beam(TransitionSystem moves, int nr_feature, int max_steps,
         # Track the "maximum violation", to use in the update.
         for i, violn in enumerate(violns):
             violn.check_crf(pbeam[i], gbeam[i])
-            # Use 'early update' if best gold is way out of contention.
-            if pbeam[i].loss > 0 and pbeam[i].min_score > (gbeam[i].score * 5.00):
-                pbeam.dones[i] = True
-                gbeam.dones[i] = True
     histories = []
     losses = []
     for violn in violns:

From f762c36e618ea8c97fbe4fea0838f04b6beaf7da Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 15:18:28 +0100
Subject: [PATCH 02/11] Evaluate accuracy at multiple beam widths

---
 spacy/cli/train.py | 99 +++++++++++++++++++++++++---------------------
 1 file changed, 55 insertions(+), 44 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index c6ada957f..3411bdc68 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -200,9 +200,9 @@ def train(
         msg.text("Loaded pretrained tok2vec for: {}".format(components))
 
     # fmt: off
-    row_head = ("Itn", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
+    row_head = ("Itn", "Beam Width", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
     row_settings = {
-        "widths": (3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7),
+        "widths": (3, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7),
         "aligns": tuple(["r" for i in row_head]),
         "spacing": 2
     }
@@ -247,51 +247,61 @@ def train(
                 epoch_model_path = output_path / ("model%d" % i)
                 nlp.to_disk(epoch_model_path)
                 nlp_loaded = util.load_model_from_path(epoch_model_path)
-                dev_docs = list(corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc))
-                nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
-                start_time = timer()
-                scorer = nlp_loaded.evaluate(dev_docs, debug)
-                end_time = timer()
-                if use_gpu < 0:
-                    gpu_wps = None
-                    cpu_wps = nwords / (end_time - start_time)
-                else:
-                    gpu_wps = nwords / (end_time - start_time)
-                    with Model.use_device("cpu"):
-                        nlp_loaded = util.load_model_from_path(epoch_model_path)
-                        dev_docs = list(
-                            corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc)
-                        )
-                        start_time = timer()
-                        scorer = nlp_loaded.evaluate(dev_docs)
-                        end_time = timer()
+                for beam_width in [1, 4, 16, 128]:
+                    for name, component in nlp_loaded.pipeline:
+                        if hasattr(component, "cfg"):
+                            component.cfg["beam_width"] = beam_width
+                    dev_docs = list(corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc))
+                    nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
+                    start_time = timer()
+                    scorer = nlp_loaded.evaluate(dev_docs, debug)
+                    end_time = timer()
+                    if use_gpu < 0:
+                        gpu_wps = None
                         cpu_wps = nwords / (end_time - start_time)
-                acc_loc = output_path / ("model%d" % i) / "accuracy.json"
-                srsly.write_json(acc_loc, scorer.scores)
+                    else:
+                        gpu_wps = nwords / (end_time - start_time)
+                        with Model.use_device("cpu"):
+                            nlp_loaded = util.load_model_from_path(epoch_model_path)
+                            nlp_loaded.parser.cfg["beam_width"]
+                            dev_docs = list(
+                                corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc)
+                            )
+                            start_time = timer()
+                            scorer = nlp_loaded.evaluate(dev_docs)
+                            end_time = timer()
+                            cpu_wps = nwords / (end_time - start_time)
+                    acc_loc = output_path / ("model%d" % i) / "accuracy.json"
+                    srsly.write_json(acc_loc, scorer.scores)
 
-                # Update model meta.json
-                meta["lang"] = nlp.lang
-                meta["pipeline"] = nlp.pipe_names
-                meta["spacy_version"] = ">=%s" % about.__version__
-                meta["accuracy"] = scorer.scores
-                meta["speed"] = {"nwords": nwords, "cpu": cpu_wps, "gpu": gpu_wps}
-                meta["vectors"] = {
-                    "width": nlp.vocab.vectors_length,
-                    "vectors": len(nlp.vocab.vectors),
-                    "keys": nlp.vocab.vectors.n_keys,
-                    "name": nlp.vocab.vectors.name
+                    # Update model meta.json
+                    meta["lang"] = nlp.lang
+                    meta["pipeline"] = nlp.pipe_names
+                    meta["spacy_version"] = ">=%s" % about.__version__
+                    if beam_width == 1:
+                        meta["speed"] = {"nwords": nwords, "cpu": cpu_wps, "gpu": gpu_wps}
+                        meta["accuracy"] = scorer.scores
+                    else:
+                        meta.setdefault("beam_accuracy", {})
+                        meta.setdefault("beam_speed", {})
+                        meta["beam_accuracy"][beam_width] = scorer.scores
+                        meta["beam_speed"][beam_width] = {"nwords": nwords, "cpu": cpu_wps, "gpu": gpu_wps}
+                    meta["vectors"] = {
+                        "width": nlp.vocab.vectors_length,
+                        "vectors": len(nlp.vocab.vectors),
+                        "keys": nlp.vocab.vectors.n_keys,
+                        "name": nlp.vocab.vectors.name
                 }
-                meta.setdefault("name", "model%d" % i)
-                meta.setdefault("version", version)
-                meta_loc = output_path / ("model%d" % i) / "meta.json"
-                srsly.write_json(meta_loc, meta)
+                    meta.setdefault("name", "model%d" % i)
+                    meta.setdefault("version", version)
+                    meta_loc = output_path / ("model%d" % i) / "meta.json"
+                    srsly.write_json(meta_loc, meta)
+                    util.set_env_log(verbose)
 
-                util.set_env_log(verbose)
-
-            progress = _get_progress(
-                i, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps
-            )
-            msg.row(progress, **row_settings)
+                    progress = _get_progress(
+                        i, beam_width, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps
+                    )
+                    msg.row(progress, **row_settings)
     finally:
         with nlp.use_params(optimizer.averages):
             final_model_path = output_path / "model-final"
@@ -377,7 +387,7 @@ def _get_metrics(component):
     return ("token_acc",)
 
 
-def _get_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
+def _get_progress(itn, beam_width, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
     scores = {}
     for col in [
         "dep_loss",
@@ -400,6 +410,7 @@ def _get_progress(itn, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
     scores["gpu_wps"] = gpu_wps or 0.0
     return [
         itn,
+        beam_width,
         "{:.3f}".format(scores["dep_loss"]),
         "{:.3f}".format(scores["ner_loss"]),
         "{:.3f}".format(scores["uas"]),

From ad56641324651b26978d00c833e4ef0c634a1df1 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 15:20:09 +0100
Subject: [PATCH 03/11] Fix Language.evaluate

---
 spacy/language.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/spacy/language.py b/spacy/language.py
index 6432f3e98..4f1e59433 100644
--- a/spacy/language.py
+++ b/spacy/language.py
@@ -597,6 +597,8 @@ class Language(object):
     ):
         if scorer is None:
             scorer = Scorer()
+        if component_cfg is None:
+            component_cfg = {}
         docs, golds = zip(*docs_golds)
         docs = list(docs)
         golds = list(golds)

From b94b2b11682be02dec1fd3e334a96f10be9f4769 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 15:20:28 +0100
Subject: [PATCH 04/11] Export hash_state from beam_utils

---
 spacy/syntax/_beam_utils.pxd | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/spacy/syntax/_beam_utils.pxd b/spacy/syntax/_beam_utils.pxd
index 7bae17558..36b0c05da 100644
--- a/spacy/syntax/_beam_utils.pxd
+++ b/spacy/syntax/_beam_utils.pxd
@@ -1,6 +1,9 @@
-from thinc.typedefs cimport class_t
+from thinc.typedefs cimport class_t, hash_t
 
 # These are passed as callbacks to thinc.search.Beam
 cdef int transition_state(void* _dest, void* _src, class_t clas, void* _moves) except -1
 
 cdef int check_final_state(void* _state, void* extra_args) except -1
+
+
+cdef hash_t hash_state(void* _state, void* _) except 0

From 693c8934e84c37f63e8e64dba37c7c3ff4b61583 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 15:22:16 +0100
Subject: [PATCH 05/11] Normalize over all actions in parser, not just valid
 ones

---
 spacy/syntax/_parser_model.pyx | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/spacy/syntax/_parser_model.pyx b/spacy/syntax/_parser_model.pyx
index f664e6a2c..841e33432 100644
--- a/spacy/syntax/_parser_model.pyx
+++ b/spacy/syntax/_parser_model.pyx
@@ -156,7 +156,7 @@ cdef void cpu_log_loss(float* d_scores,
     """Do multi-label log loss"""
     cdef double max_, gmax, Z, gZ
     best = arg_max_if_gold(scores, costs, is_valid, O)
-    guess = arg_max_if_valid(scores, is_valid, O)
+    guess = Vec.arg_max(scores, O)
     if best == -1 or guess == -1:
         # These shouldn't happen, but if they do, we want to make sure we don't
         # cause an OOB access.
@@ -166,14 +166,11 @@ cdef void cpu_log_loss(float* d_scores,
     max_ = scores[guess]
     gmax = scores[best]
     for i in range(O):
-        if is_valid[i]:
-            Z += exp(scores[i] - max_)
-            if costs[i] <= costs[best]:
-                gZ += exp(scores[i] - gmax)
+        Z += exp(scores[i] - max_)
+        if costs[i] <= costs[best]:
+            gZ += exp(scores[i] - gmax)
     for i in range(O):
-        if not is_valid[i]:
-            d_scores[i] = 0.
-        elif costs[i] <= costs[best]:
+        if costs[i] <= costs[best]:
             d_scores[i] = (exp(scores[i]-max_) / Z) - (exp(scores[i]-gmax)/gZ)
         else:
             d_scores[i] = exp(scores[i]-max_) / Z

From b13b2aeb541d88d2eebced5399bda4d2ad8fc672 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Fri, 15 Mar 2019 15:22:38 +0100
Subject: [PATCH 06/11] Use hash_state in beam

---
 spacy/syntax/_beam_utils.pyx | 2 +-
 spacy/syntax/nn_parser.pyx   | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/spacy/syntax/_beam_utils.pyx b/spacy/syntax/_beam_utils.pyx
index 83137a4c4..dc482f278 100644
--- a/spacy/syntax/_beam_utils.pyx
+++ b/spacy/syntax/_beam_utils.pyx
@@ -96,7 +96,7 @@ cdef class ParserBeam(object):
             self._set_scores(beam, scores[i])
             if self.golds is not None:
                 self._set_costs(beam, self.golds[i], follow_gold=follow_gold)
-            beam.advance(transition_state, NULL, <void*>self.moves.c)
+            beam.advance(transition_state, hash_state, <void*>self.moves.c)
             beam.check_done(check_final_state, NULL)
             # This handles the non-monotonic stuff for the parser.
             if beam.is_done and self.golds is not None:
diff --git a/spacy/syntax/nn_parser.pyx b/spacy/syntax/nn_parser.pyx
index cbeef756d..c140cc4f3 100644
--- a/spacy/syntax/nn_parser.pyx
+++ b/spacy/syntax/nn_parser.pyx
@@ -119,6 +119,8 @@ cdef class Parser:
             cfg['beam_width'] = util.env_opt('beam_width', 1)
         if 'beam_density' not in cfg:
             cfg['beam_density'] = util.env_opt('beam_density', 0.0)
+        if 'beam_update_prob' not in cfg:
+            cfg['beam_update_prob'] = util.env_opt('beam_update_prob', 1.0)
         cfg.setdefault('cnn_maxout_pieces', 3)
         self.cfg = cfg
         self.model = model
@@ -383,7 +385,7 @@ cdef class Parser:
                     self.moves.set_valid(beam.is_valid[i], state)
                     memcpy(beam.scores[i], c_scores, scores.shape[1] * sizeof(float))
                     c_scores += scores.shape[1]
-            beam.advance(_beam_utils.transition_state, NULL, <void*>self.moves.c)
+            beam.advance(_beam_utils.transition_state, _beam_utils.hash_state, <void*>self.moves.c)
             beam.check_done(_beam_utils.check_final_state, NULL)
         return [b for b in beams if not b.is_done]
 

From daa8c3787a61ebbb5786b6496aa4ab3f15c83f5c Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 16 Mar 2019 15:02:39 +0100
Subject: [PATCH 07/11] Add eval_beam_widths argument to spacy train

---
 spacy/cli/train.py | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 3411bdc68..e1d5cdefc 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -58,6 +58,7 @@ from .. import about
         str,
     ),
     noise_level=("Amount of corruption for data augmentation", "option", "nl", float),
+    eval_beam_widths=("Beam widths to evaluate, e.g. 4,8", "option", "bw", str),
     gold_preproc=("Use gold preprocessing", "flag", "G", bool),
     learn_tokens=("Make parser learn gold-standard tokenization", "flag", "T", bool),
     verbose=("Display more information for debug", "flag", "VV", bool),
@@ -81,6 +82,7 @@ def train(
     parser_multitasks="",
     entity_multitasks="",
     noise_level=0.0,
+    eval_beam_widths="",
     gold_preproc=False,
     learn_tokens=False,
     verbose=False,
@@ -134,6 +136,14 @@ def train(
         util.env_opt("batch_compound", 1.001),
     )
 
+    if not eval_beam_widths:
+        eval_beam_widths = [1]
+    else:
+        eval_beam_widths = [int(bw) for bw in eval_beam_widths.split(",")]
+        if 1 not in eval_beam_widths:
+            eval_beam_widths.append(1)
+        eval_beam_widths.sort()
+
     # Set up the base model and pipeline. If a base model is specified, load
     # the model and make sure the pipeline matches the pipeline setting. If
     # training starts from a blank model, intitalize the language class.
@@ -247,7 +257,7 @@ def train(
                 epoch_model_path = output_path / ("model%d" % i)
                 nlp.to_disk(epoch_model_path)
                 nlp_loaded = util.load_model_from_path(epoch_model_path)
-                for beam_width in [1, 4, 16, 128]:
+                for beam_width in eval_beam_widths:
                     for name, component in nlp_loaded.pipeline:
                         if hasattr(component, "cfg"):
                             component.cfg["beam_width"] = beam_width

From 7a354761c74b28f5773401f11e5888e4a003d74e Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 16 Mar 2019 15:55:13 +0100
Subject: [PATCH 08/11] Auto-format

---
 spacy/cli/train.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index e1d5cdefc..c74ec8663 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -261,7 +261,9 @@ def train(
                     for name, component in nlp_loaded.pipeline:
                         if hasattr(component, "cfg"):
                             component.cfg["beam_width"] = beam_width
-                    dev_docs = list(corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc))
+                    dev_docs = list(
+                        corpus.dev_docs(nlp_loaded, gold_preproc=gold_preproc)
+                    )
                     nwords = sum(len(doc_gold[0]) for doc_gold in dev_docs)
                     start_time = timer()
                     scorer = nlp_loaded.evaluate(dev_docs, debug)
@@ -289,19 +291,27 @@ def train(
                     meta["pipeline"] = nlp.pipe_names
                     meta["spacy_version"] = ">=%s" % about.__version__
                     if beam_width == 1:
-                        meta["speed"] = {"nwords": nwords, "cpu": cpu_wps, "gpu": gpu_wps}
+                        meta["speed"] = {
+                            "nwords": nwords,
+                            "cpu": cpu_wps,
+                            "gpu": gpu_wps,
+                        }
                         meta["accuracy"] = scorer.scores
                     else:
                         meta.setdefault("beam_accuracy", {})
                         meta.setdefault("beam_speed", {})
                         meta["beam_accuracy"][beam_width] = scorer.scores
-                        meta["beam_speed"][beam_width] = {"nwords": nwords, "cpu": cpu_wps, "gpu": gpu_wps}
+                        meta["beam_speed"][beam_width] = {
+                            "nwords": nwords,
+                            "cpu": cpu_wps,
+                            "gpu": gpu_wps,
+                        }
                     meta["vectors"] = {
                         "width": nlp.vocab.vectors_length,
                         "vectors": len(nlp.vocab.vectors),
                         "keys": nlp.vocab.vectors.n_keys,
-                        "name": nlp.vocab.vectors.name
-                }
+                        "name": nlp.vocab.vectors.name,
+                    }
                     meta.setdefault("name", "model%d" % i)
                     meta.setdefault("version", version)
                     meta_loc = output_path / ("model%d" % i) / "meta.json"

From c94742ff6453a3edb1b4e8a839d6d2bb0e61c717 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 16 Mar 2019 15:55:31 +0100
Subject: [PATCH 09/11] Only add beam width if customised

---
 spacy/cli/train.py | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index c74ec8663..42965edd0 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -143,6 +143,7 @@ def train(
         if 1 not in eval_beam_widths:
             eval_beam_widths.append(1)
         eval_beam_widths.sort()
+    has_beam_widths = eval_beam_widths != [1]
 
     # Set up the base model and pipeline. If a base model is specified, load
     # the model and make sure the pipeline matches the pipeline setting. If
@@ -211,11 +212,11 @@ def train(
 
     # fmt: off
     row_head = ("Itn", "Beam Width", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
-    row_settings = {
-        "widths": (3, 10, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7),
-        "aligns": tuple(["r" for i in row_head]),
-        "spacing": 2
-    }
+    row_widths = (3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7)
+    if has_beam_widths:
+        row_head.insert(1, "Beam W.")
+        row_widths.insert(1, 7)
+    row_settings = {"widths": row_widths, "aligns": tuple(["r" for i in row_head]), "spacing": 2}
     # fmt: on
     print("")
     msg.row(row_head, **row_settings)
@@ -318,8 +319,11 @@ def train(
                     srsly.write_json(meta_loc, meta)
                     util.set_env_log(verbose)
 
+                    progress_args = [i, losses, scorer.scores]
+                    if has_beam_widths:
+                        progress_args.inset(1, beam_with)
                     progress = _get_progress(
-                        i, beam_width, losses, scorer.scores, cpu_wps=cpu_wps, gpu_wps=gpu_wps
+                        *progress_args, cpu_wps=cpu_wps, gpu_wps=gpu_wps
                     )
                     msg.row(progress, **row_settings)
     finally:

From e7aa25d9b1107a07896f34bfef4bb7f0c630a5ad Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 16 Mar 2019 16:02:47 +0100
Subject: [PATCH 10/11] Fix beam width integration

---
 spacy/cli/train.py | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index 42965edd0..b97293b16 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -211,7 +211,7 @@ def train(
         msg.text("Loaded pretrained tok2vec for: {}".format(components))
 
     # fmt: off
-    row_head = ("Itn", "Beam Width", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
+    row_head = ("Itn", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
     row_widths = (3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7)
     if has_beam_widths:
         row_head.insert(1, "Beam W.")
@@ -319,11 +319,13 @@ def train(
                     srsly.write_json(meta_loc, meta)
                     util.set_env_log(verbose)
 
-                    progress_args = [i, losses, scorer.scores]
-                    if has_beam_widths:
-                        progress_args.inset(1, beam_with)
                     progress = _get_progress(
-                        *progress_args, cpu_wps=cpu_wps, gpu_wps=gpu_wps
+                        i,
+                        losses,
+                        scorer.scores,
+                        beam_width=beam_width if has_beam_widths else None,
+                        cpu_wps=cpu_wps,
+                        gpu_wps=gpu_wps,
                     )
                     msg.row(progress, **row_settings)
     finally:
@@ -411,7 +413,7 @@ def _get_metrics(component):
     return ("token_acc",)
 
 
-def _get_progress(itn, beam_width, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0):
+def _get_progress(itn, losses, dev_scores, beam_width=None, cpu_wps=0.0, gpu_wps=0.0):
     scores = {}
     for col in [
         "dep_loss",
@@ -432,9 +434,8 @@ def _get_progress(itn, beam_width, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0)
     scores.update(dev_scores)
     scores["cpu_wps"] = cpu_wps
     scores["gpu_wps"] = gpu_wps or 0.0
-    return [
+    result = [
         itn,
-        beam_width,
         "{:.3f}".format(scores["dep_loss"]),
         "{:.3f}".format(scores["ner_loss"]),
         "{:.3f}".format(scores["uas"]),
@@ -446,3 +447,6 @@ def _get_progress(itn, beam_width, losses, dev_scores, cpu_wps=0.0, gpu_wps=0.0)
         "{:.0f}".format(scores["cpu_wps"]),
         "{:.0f}".format(scores["gpu_wps"]),
     ]
+    if beam_width is not None:
+        result.insert(1, beam_width)
+    return result

From 0f8739c7cb9c2e2b59766cfb51159e0ddef814d1 Mon Sep 17 00:00:00 2001
From: Ines Montani <ines@ines.io>
Date: Sat, 16 Mar 2019 16:04:15 +0100
Subject: [PATCH 11/11] Update train.py

---
 spacy/cli/train.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index b97293b16..743fec9ea 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -211,8 +211,8 @@ def train(
         msg.text("Loaded pretrained tok2vec for: {}".format(components))
 
     # fmt: off
-    row_head = ("Itn", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS")
-    row_widths = (3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7)
+    row_head = ["Itn", "Dep Loss", "NER Loss", "UAS", "NER P", "NER R", "NER F", "Tag %", "Token %", "CPU WPS", "GPU WPS"]
+    row_widths = [3, 10, 10, 7, 7, 7, 7, 7, 7, 7, 7]
     if has_beam_widths:
         row_head.insert(1, "Beam W.")
         row_widths.insert(1, 7)