mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* fix: gold pyx * remove print * skip test in python2 * Add unicode declarations and don't skip test on Python 2
This commit is contained in:
parent
bddfbc7e1b
commit
fcd25db033
|
@ -636,9 +636,9 @@ cdef class GoldParse:
|
|||
if morphology is None:
|
||||
morphology = [None for _ in words]
|
||||
if entities is None:
|
||||
entities = ["-" for _ in doc]
|
||||
entities = ["-" for _ in words]
|
||||
elif len(entities) == 0:
|
||||
entities = ["O" for _ in doc]
|
||||
entities = ["O" for _ in words]
|
||||
else:
|
||||
# Translate the None values to '-', to make processing easier.
|
||||
# See Issue #2603
|
||||
|
@ -701,7 +701,9 @@ cdef class GoldParse:
|
|||
self.heads[i] = i+1
|
||||
self.labels[i] = "subtok"
|
||||
else:
|
||||
self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]]
|
||||
head_i = heads[i2j_multi[i]]
|
||||
if head_i:
|
||||
self.heads[i] = self.gold_to_cand[head_i]
|
||||
self.labels[i] = deps[i2j_multi[i]]
|
||||
# Now set NER...This is annoying because if we've split
|
||||
# got an entity word split into two, we need to adjust the
|
||||
|
|
13
spacy/tests/regression/test_issue4529.py
Normal file
13
spacy/tests/regression/test_issue4529.py
Normal file
|
@ -0,0 +1,13 @@
|
|||
# coding: utf8
|
||||
from __future__ import unicode_literals
|
||||
|
||||
import pytest
|
||||
from spacy.gold import GoldParse
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])]
|
||||
)
|
||||
def test_gold_misaligned(en_tokenizer, text, words):
|
||||
doc = en_tokenizer(text)
|
||||
GoldParse(doc, words=words)
|
Loading…
Reference in New Issue
Block a user