mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 09:56:39 +03:00
parent
8e7414dace
commit
9df5a429a6
|
@ -647,9 +647,9 @@ cdef class GoldParse:
|
||||||
if morphology is None:
|
if morphology is None:
|
||||||
morphology = [None for _ in words]
|
morphology = [None for _ in words]
|
||||||
if entities is None:
|
if entities is None:
|
||||||
entities = ["-" for _ in words]
|
entities = ["-" for _ in doc]
|
||||||
elif len(entities) == 0:
|
elif len(entities) == 0:
|
||||||
entities = ["O" for _ in words]
|
entities = ["O" for _ in doc]
|
||||||
else:
|
else:
|
||||||
# Translate the None values to '-', to make processing easier.
|
# Translate the None values to '-', to make processing easier.
|
||||||
# See Issue #2603
|
# See Issue #2603
|
||||||
|
@ -712,9 +712,7 @@ cdef class GoldParse:
|
||||||
self.heads[i] = i+1
|
self.heads[i] = i+1
|
||||||
self.labels[i] = "subtok"
|
self.labels[i] = "subtok"
|
||||||
else:
|
else:
|
||||||
head_i = heads[i2j_multi[i]]
|
self.heads[i] = self.gold_to_cand[heads[i2j_multi[i]]]
|
||||||
if head_i:
|
|
||||||
self.heads[i] = self.gold_to_cand[head_i]
|
|
||||||
self.labels[i] = deps[i2j_multi[i]]
|
self.labels[i] = deps[i2j_multi[i]]
|
||||||
# Now set NER...This is annoying because if we've split
|
# Now set NER...This is annoying because if we've split
|
||||||
# got an entity word split into two, we need to adjust the
|
# got an entity word split into two, we need to adjust the
|
||||||
|
|
|
@ -1,13 +0,0 @@
|
||||||
# coding: utf8
|
|
||||||
from __future__ import unicode_literals
|
|
||||||
|
|
||||||
import pytest
|
|
||||||
from spacy.gold import GoldParse
|
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize(
|
|
||||||
"text,words", [("A'B C", ["A", "'", "B", "C"]), ("A-B", ["A-B"])]
|
|
||||||
)
|
|
||||||
def test_gold_misaligned(en_tokenizer, text, words):
|
|
||||||
doc = en_tokenizer(text)
|
|
||||||
GoldParse(doc, words=words)
|
|
Loading…
Reference in New Issue
Block a user