Revert "[#4529] fix: gold pyx (#4530)"

This reverts commit fcd25db033.
This commit is contained in:
Matthew Honnibal 2019-10-27 16:34:35 +01:00 committed by GitHub
parent 8e7414dace
commit 9df5a429a6
2 changed files with 3 additions and 18 deletions

View File

@ -647,9 +647,9 @@ cdef class GoldParse:
if morphology is None: if morphology is None:
morphology = [None for _ in words] morphology = [None for _ in words]
if entities is None: if entities is None:
entities = ["-" for _ in words] entities = ["-" for _ in doc]
elif len(entities) == 0: elif len(entities) == 0:
entities = ["O" for _ in words] entities = ["O" for _ in doc]
else: else:
# Translate the None values to '-', to make processing easier. # Translate the None values to '-', to make processing easier.
# See Issue #2603 # See Issue #2603
@ -712,9 +712,7 @@ cdef class GoldParse:
self.heads[i] = i+1 self.heads[i] = i+1
self.labels[i] = "subtok" self.labels[i] = "subtok"
else: else:
head_i = heads[i2j_multi[i]] self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]]
if head_i:
self.heads[i] = self.gold_to_cand[head_i]
self.labels[i] = deps[i2j_multi[i]] self.labels[i] = deps[i2j_multi[i]]
# Now set NER...This is annoying because if we've split # Now set NER...This is annoying because if we've split
# got an entity word split into two, we need to adjust the # got an entity word split into two, we need to adjust the

View File

@ -1,13 +0,0 @@
# coding: utf8
from __future__ import unicode_literals
import pytest
from spacy.gold import GoldParse
@pytest.mark.parametrize(
"text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])]
)
def test_gold_misaligned(en_tokenizer, text, words):
doc = en_tokenizer(text)
GoldParse(doc, words=words)