mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-10 15:14:56 +03:00
Prepare ArcEager.preprocess_gold to handle subtokens
This commit is contained in:
parent
b8461e71b7
commit
5f68e491e1
|
@ -475,11 +475,11 @@ cdef class ArcEager(TransitionSystem):
|
||||||
if not self.has_gold(gold):
|
if not self.has_gold(gold):
|
||||||
return None
|
return None
|
||||||
for i, (head_group, dep_group) in enumerate(zip(gold.heads, gold.labels)):
|
for i, (head_group, dep_group) in enumerate(zip(gold.heads, gold.labels)):
|
||||||
|
if not USE_SPLIT:
|
||||||
|
if isinstance(head_group, list):
|
||||||
|
head_group = [(None, 0)]
|
||||||
|
dep_group = [None]
|
||||||
# Missing values
|
# Missing values
|
||||||
if head_group is None or dep_group is None:
|
|
||||||
gold.c.heads[i] = i
|
|
||||||
gold.c.has_dep[i] = False
|
|
||||||
continue
|
|
||||||
if not isinstance(head_group, list):
|
if not isinstance(head_group, list):
|
||||||
# Map the simple format into the elaborate one we need for
|
# Map the simple format into the elaborate one we need for
|
||||||
# the fused tokens.
|
# the fused tokens.
|
||||||
|
@ -489,6 +489,10 @@ cdef class ArcEager(TransitionSystem):
|
||||||
if not isinstance(head_addr, tuple):
|
if not isinstance(head_addr, tuple):
|
||||||
head_addr = (head_addr, 0)
|
head_addr = (head_addr, 0)
|
||||||
head, subtoken = head_addr
|
head, subtoken = head_addr
|
||||||
|
if head is None or dep is None:
|
||||||
|
gold.c.heads[i] = i
|
||||||
|
gold.c.has_dep[i] = False
|
||||||
|
continue
|
||||||
if head > i:
|
if head > i:
|
||||||
action = LEFT
|
action = LEFT
|
||||||
elif head < i:
|
elif head < i:
|
||||||
|
@ -665,6 +669,8 @@ cdef class ArcEager(TransitionSystem):
|
||||||
# Check label set --- leading cause
|
# Check label set --- leading cause
|
||||||
label_set = set([self.strings[self.c[i].label] for i in range(self.n_moves)])
|
label_set = set([self.strings[self.c[i].label] for i in range(self.n_moves)])
|
||||||
for label_str in gold.labels:
|
for label_str in gold.labels:
|
||||||
|
if isinstance(label_str, list):
|
||||||
|
continue
|
||||||
if label_str is not None and label_str not in label_set:
|
if label_str is not None and label_str not in label_set:
|
||||||
raise ValueError("Cannot get gold parser action: unknown label: %s" % label_str)
|
raise ValueError("Cannot get gold parser action: unknown label: %s" % label_str)
|
||||||
# Check projectivity --- other leading cause
|
# Check projectivity --- other leading cause
|
||||||
|
|
Loading…
Reference in New Issue
Block a user