mirror of
https://github.com/explosion/spaCy.git
synced 2025-08-10 15:14:56 +03:00
Prepare ArcEager.preprocess_gold to handle subtokens
This commit is contained in:
parent
2d929ffc5d
commit
b8461e71b7
|
@ -474,16 +474,21 @@ cdef class ArcEager(TransitionSystem):
|
||||||
def preprocess_gold(self, GoldParse gold):
|
def preprocess_gold(self, GoldParse gold):
|
||||||
if not self.has_gold(gold):
|
if not self.has_gold(gold):
|
||||||
return None
|
return None
|
||||||
for i, (head, dep) in enumerate(zip(gold.heads, gold.labels)):
|
for i, (head_group, dep_group) in enumerate(zip(gold.heads, gold.labels)):
|
||||||
# Missing values
|
# Missing values
|
||||||
if head is None or dep is None:
|
if head_group is None or dep_group is None:
|
||||||
gold.c.heads[i] = i
|
gold.c.heads[i] = i
|
||||||
gold.c.has_dep[i] = False
|
gold.c.has_dep[i] = False
|
||||||
elif isinstance(head, list):
|
continue
|
||||||
# TODO: This is where the fused token stuff will happen
|
if not isinstance(head_group, list):
|
||||||
gold.c.heads[i] = i
|
# Map the simple format into the elaborate one we need for
|
||||||
gold.c.has_dep[i] = False
|
# the fused tokens.
|
||||||
else:
|
head_group = [(head_group, 0)]
|
||||||
|
dep_group = [dep_group]
|
||||||
|
for head_addr, dep in zip(head_group, dep_group):
|
||||||
|
if not isinstance(head_addr, tuple):
|
||||||
|
head_addr = (head_addr, 0)
|
||||||
|
head, subtoken = head_addr
|
||||||
if head > i:
|
if head > i:
|
||||||
action = LEFT
|
action = LEFT
|
||||||
elif head < i:
|
elif head < i:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user