From fc6e34c3a13b93caaed7b2c0cf60dcc0df59c0f4 Mon Sep 17 00:00:00 2001
From: svlandeg <sofie.vanlandeghem@gmail.com>
Date: Wed, 26 Feb 2020 08:44:22 +0100
Subject: [PATCH] fix bugs from porting master to develop

---
 .../wikidata_train_entity_linker.py           |  4 +---
 spacy/cli/train.py                            | 22 +++----------------
 2 files changed, 4 insertions(+), 22 deletions(-)

diff --git a/bin/wiki_entity_linking/wikidata_train_entity_linker.py b/bin/wiki_entity_linking/wikidata_train_entity_linker.py
index 386af7d4d..af0e68768 100644
--- a/bin/wiki_entity_linking/wikidata_train_entity_linker.py
+++ b/bin/wiki_entity_linking/wikidata_train_entity_linker.py
@@ -175,12 +175,10 @@ def main(
                             kb=kb,
                             labels_discard=labels_discard,
                         )
-                        docs, golds = zip(*train_batch)
                     try:
                         with nlp.disable_pipes(*other_pipes):
                             nlp.update(
-                                docs=docs,
-                                golds=golds,
+                                examples=train_batch,
                                 sgd=optimizer,
                                 drop=dropout,
                                 losses=losses,
diff --git a/spacy/cli/train.py b/spacy/cli/train.py
index d8514095b..92f94b53d 100644
--- a/spacy/cli/train.py
+++ b/spacy/cli/train.py
@@ -28,13 +28,6 @@ def train(
     pipeline: ("Comma-separated names of pipeline components", "option", "p", str) = "tagger,parser,ner",
     vectors: ("Model to load vectors from", "option", "v", str) = None,
     replace_components: ("Replace components from base model", "flag", "R", bool) = False,
-    width: ("Width of CNN layers of Tok2Vec component", "option", "cw", int) = 96,
-    conv_depth: ("Depth of CNN layers of Tok2Vec component", "option", "cd", int) = 4,
-    cnn_window: ("Window size for CNN layers of Tok2Vec component", "option", "cW", int) = 1,
-    cnn_pieces: ("Maxout size for CNN layers of Tok2Vec component. 1 for Mish", "option", "cP", int) = 3,
-    use_chars: ("Whether to use character-based embedding of Tok2Vec component", "flag", "chr", bool) = False,
-    bilstm_depth: ("Depth of BiLSTM layers of Tok2Vec component (requires PyTorch)", "option", "lstm", int) = 0,
-    embed_rows: ("Number of embedding rows of Tok2Vec component", "option", "er", int) = 2000,
     n_iter: ("Number of iterations", "option", "n", int) = 30,
     n_early_stopping: ("Maximum number of training epochs without dev accuracy improvement", "option", "ne", int) = None,
     n_examples: ("Number of examples", "option", "ns", int) = 0,
@@ -232,14 +225,7 @@ def train(
     else:
         # Start with a blank model, call begin_training
         cfg = {"device": use_gpu}
-        cfg["conv_depth"] = conv_depth
-        cfg["token_vector_width"] = width
-        cfg["bilstm_depth"] = bilstm_depth
-        cfg["cnn_maxout_pieces"] = cnn_pieces
-        cfg["embed_size"] = embed_rows
-        cfg["conv_window"] = cnn_window
-        cfg["subword_features"] = not use_chars
-        optimizer = nlp.begin_training(lambda: corpus.train_tuples, **cfg)
+        optimizer = nlp.begin_training(lambda: corpus.train_examples, **cfg)
     nlp._optimizer = None
 
     # Load in pretrained weights
@@ -362,11 +348,9 @@ def train(
                 for batch in util.minibatch_by_words(train_data, size=batch_sizes):
                     if not batch:
                         continue
-                    docs, golds = zip(*batch)
                     try:
                         nlp.update(
-                            docs,
-                            golds,
+                            batch,
                             sgd=optimizer,
                             drop=next(dropout_rates),
                             losses=losses,
@@ -609,7 +593,7 @@ def _get_metrics(component):
     elif component == "tagger":
         return ("tags_acc",)
     elif component == "ner":
-        return ("ents_f", "ents_p", "ents_r", "enty_per_type")
+        return ("ents_f", "ents_p", "ents_r", "ents_per_type")
     elif component == "sentrec":
         return ("sent_f", "sent_p", "sent_r")
     elif component == "textcat":