Prepare ArcEager.preprocess_gold to handle subtokens

This commit is contained in:
Matthew Honnibal 2018-04-01 18:31:33 +02:00
parent b8461e71b7
commit 5f68e491e1

View File

@ -475,11 +475,11 @@ cdef class ArcEager(TransitionSystem):
if not self.has_gold(gold): if not self.has_gold(gold):
return None return None
for i, (head_group, dep_group) in enumerate(zip(gold.heads, gold.labels)): for i, (head_group, dep_group) in enumerate(zip(gold.heads, gold.labels)):
if not USE_SPLIT:
if isinstance(head_group, list):
head_group = [(None, 0)]
dep_group = [None]
# Missing values # Missing values
if head_group is None or dep_group is None:
gold.c.heads[i] = i
gold.c.has_dep[i] = False
continue
if not isinstance(head_group, list): if not isinstance(head_group, list):
# Map the simple format into the elaborate one we need for # Map the simple format into the elaborate one we need for
# the fused tokens. # the fused tokens.
@ -489,6 +489,10 @@ cdef class ArcEager(TransitionSystem):
if not isinstance(head_addr, tuple): if not isinstance(head_addr, tuple):
head_addr = (head_addr, 0) head_addr = (head_addr, 0)
head, subtoken = head_addr head, subtoken = head_addr
if head is None or dep is None:
gold.c.heads[i] = i
gold.c.has_dep[i] = False
continue
if head > i: if head > i:
action = LEFT action = LEFT
elif head < i: elif head < i:
@ -665,6 +669,8 @@ cdef class ArcEager(TransitionSystem):
# Check label set --- leading cause # Check label set --- leading cause
label_set = set([self.strings[self.c[i].label] for i in range(self.n_moves)]) label_set = set([self.strings[self.c[i].label] for i in range(self.n_moves)])
for label_str in gold.labels: for label_str in gold.labels:
if isinstance(label_str, list):
continue
if label_str is not None and label_str not in label_set: if label_str is not None and label_str not in label_set:
raise ValueError("Cannot get gold parser action: unknown label: %s" % label_str) raise ValueError("Cannot get gold parser action: unknown label: %s" % label_str)
# Check projectivity --- other leading cause # Check projectivity --- other leading cause