mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-13 18:56:36 +03:00
* fix: gold pyx * remove print * skip test in python2 * Add unicode declarations and don't skip test on Python 2
This commit is contained in:
parent
bddfbc7e1b
commit
fcd25db033
|
@ -636,9 +636,9 @@ cdef class GoldParse:
|
||||||
if morphology is None:
|
if morphology is None:
|
||||||
morphology = [None for _ in words]
|
morphology = [None for _ in words]
|
||||||
if entities is None:
|
if entities is None:
|
||||||
entities = ["-" for _ in doc]
|
entities = ["-" for _ in words]
|
||||||
elif len(entities) == 0:
|
elif len(entities) == 0:
|
||||||
entities = ["O" for _ in doc]
|
entities = ["O" for _ in words]
|
||||||
else:
|
else:
|
||||||
# Translate the None values to '-', to make processing easier.
|
# Translate the None values to '-', to make processing easier.
|
||||||
# See Issue #2603
|
# See Issue #2603
|
||||||
|
@ -701,7 +701,9 @@ cdef class GoldParse:
|
||||||
self.heads[i] = i+1
|
self.heads[i] = i+1
|
||||||
self.labels[i] = "subtok"
|
self.labels[i] = "subtok"
|
||||||
else:
|
else:
|
||||||
self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]]
|
head_i = heads[i2j_multi[i]]
|
||||||
|
if head_i:
|
||||||
|
self.heads[i] = self.gold_to_cand[head_i]
|
||||||
self.labels[i] = deps[i2j_multi[i]]
|
self.labels[i] = deps[i2j_multi[i]]
|
||||||
# Now set NER...This is annoying because if we've split
|
# Now set NER...This is annoying because if we've split
|
||||||
# got an entity word split into two, we need to adjust the
|
# got an entity word split into two, we need to adjust the
|
||||||
|
|
13
spacy/tests/regression/test_issue4529.py
Normal file
13
spacy/tests/regression/test_issue4529.py
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# coding: utf8
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
from spacy.gold import GoldParse
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])]
|
||||||
|
)
|
||||||
|
def test_gold_misaligned(en_tokenizer, text, words):
|
||||||
|
doc = en_tokenizer(text)
|
||||||
|
GoldParse(doc, words=words)
|
Loading…
Reference in New Issue
Block a user