From 9df5a429a6708151f1ccb5373dc718a7995d2058 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sun, 27 Oct 2019 16:34:35 +0100
Subject: [PATCH] Revert "[#4529] fix: gold pyx (#4530)"

This reverts commit fcd25db033976da00af0e765a53df389856cf6a8.
---
 spacy/gold.pyx                           |  8 +++-----
 spacy/tests/regression/test_issue4529.py | 13 -------------
 2 files changed, 3 insertions(+), 18 deletions(-)
 delete mode 100644 spacy/tests/regression/test_issue4529.py

diff --git a/spacy/gold.pyx b/spacy/gold.pyx
index 817b059ce..19a464523 100644
--- a/spacy/gold.pyx
+++ b/spacy/gold.pyx
@@ -647,9 +647,9 @@ cdef class GoldParse:
             if morphology is None:
                 morphology = [None for _ in words]
             if entities is None:
-                entities = ["-" for _ in words]
+                entities = ["-" for _ in doc]
             elif len(entities) == 0:
-                entities = ["O" for _ in words]
+                entities = ["O" for _ in doc]
             else:
                 # Translate the None values to '-', to make processing easier.
                 # See Issue #2603
@@ -712,9 +712,7 @@ cdef class GoldParse:
                             self.heads[i] = i+1
                             self.labels[i] = "subtok"
                         else:
-                            head_i = heads[i2j_multi[i]]
-                            if head_i:
-                                self.heads[i] = self.gold_to_cand[head_i]
+                            self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]]
                             self.labels[i] = deps[i2j_multi[i]]
                         # Now set NER...This is annoying because if we've split
                         # got an entity word split into two, we need to adjust the
diff --git a/spacy/tests/regression/test_issue4529.py b/spacy/tests/regression/test_issue4529.py
deleted file mode 100644
index 381957be6..000000000
--- a/spacy/tests/regression/test_issue4529.py
+++ /dev/null
@@ -1,13 +0,0 @@
-# coding: utf8
-from __future__ import unicode_literals
-
-import pytest
-from spacy.gold import GoldParse
-
-
-@pytest.mark.parametrize(
-    "text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])]
-)
-def test_gold_misaligned(en_tokenizer, text, words):
-    doc = en_tokenizer(text)
-    GoldParse(doc, words=words)