mirror of
https://github.com/explosion/spaCy.git
synced 2025-06-06 06:03:11 +03:00
Add hack to deprojectivize to handle cycles
This commit is contained in:
parent
4fa967ea84
commit
4a38a698d5
|
@ -112,12 +112,21 @@ cpdef deprojectivize(Doc doc):
|
||||||
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
||||||
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
||||||
# hitting a Y then make this the new head.
|
# hitting a Y then make this the new head.
|
||||||
|
orig_heads = [t.head.i for t in doc]
|
||||||
|
orig_deps = [t.dep_ for t in doc]
|
||||||
for i in range(doc.length):
|
for i in range(doc.length):
|
||||||
label = doc.vocab.strings[doc.c[i].dep]
|
label = doc.vocab.strings[doc.c[i].dep]
|
||||||
if DELIMITER in label:
|
if DELIMITER in label:
|
||||||
new_label, head_label = label.split(DELIMITER)
|
new_label, head_label = label.split(DELIMITER)
|
||||||
new_head = _find_new_head(doc[i], head_label)
|
new_head = _find_new_head(doc[i], head_label)
|
||||||
doc.c[i].head = new_head.i - i
|
if new_head is not None:
|
||||||
|
doc.c[i].head = new_head.i - i
|
||||||
|
else:
|
||||||
|
print(i, doc.text, [t.text for t in doc], orig_heads, orig_deps)
|
||||||
|
if i == 0:
|
||||||
|
doc.c[i].head = 0
|
||||||
|
else:
|
||||||
|
doc.c[i].head = -1
|
||||||
doc.c[i].dep = doc.vocab.strings.add(new_label)
|
doc.c[i].dep = doc.vocab.strings.add(new_label)
|
||||||
set_children_from_heads(doc.c, 0, doc.length)
|
set_children_from_heads(doc.c, 0, doc.length)
|
||||||
return doc
|
return doc
|
||||||
|
@ -165,7 +174,11 @@ def _find_new_head(token, headlabel):
|
||||||
# returns the id of the first descendant with the given label
|
# returns the id of the first descendant with the given label
|
||||||
# if there is none, return the current head (no change)
|
# if there is none, return the current head (no change)
|
||||||
queue = [token.head]
|
queue = [token.head]
|
||||||
|
n_iter = 0
|
||||||
while queue:
|
while queue:
|
||||||
|
if n_iter > len(token.doc):
|
||||||
|
return None
|
||||||
|
n_iter += 1
|
||||||
next_queue = []
|
next_queue = []
|
||||||
for qtoken in queue:
|
for qtoken in queue:
|
||||||
for child in qtoken.children:
|
for child in qtoken.children:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user