mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-03 02:17:00 +03:00
Add logging for infinite loop
This commit is contained in:
parent
d123e1ec8f
commit
605f0618c4
|
@ -4,6 +4,7 @@ for doing pseudo-projective parsing implementation uses the HEAD decoration
|
||||||
scheme.
|
scheme.
|
||||||
"""
|
"""
|
||||||
from copy import copy
|
from copy import copy
|
||||||
|
import json
|
||||||
|
|
||||||
from ...tokens.doc cimport Doc, set_children_from_heads
|
from ...tokens.doc cimport Doc, set_children_from_heads
|
||||||
|
|
||||||
|
@ -112,13 +113,18 @@ cpdef deprojectivize(Doc doc):
|
||||||
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
||||||
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
||||||
# hitting a Y then make this the new head.
|
# hitting a Y then make this the new head.
|
||||||
|
new_heads = []
|
||||||
|
new_labels = []
|
||||||
for i in range(doc.length):
|
for i in range(doc.length):
|
||||||
label = doc.vocab.strings[doc.c[i].dep]
|
label = doc.vocab.strings[doc.c[i].dep]
|
||||||
if DELIMITER in label:
|
if DELIMITER in label:
|
||||||
new_label, head_label = label.split(DELIMITER)
|
new_label, head_label = label.split(DELIMITER)
|
||||||
new_head = _find_new_head(doc[i], head_label)
|
new_head = _find_new_head(doc[i], head_label)
|
||||||
doc.c[i].head = new_head.i - i
|
new_heads.append(new_head.i - i)
|
||||||
doc.c[i].dep = doc.vocab.strings.add(new_label)
|
new_labels.append(doc.vocab.strings.add(new_label))
|
||||||
|
for i, (head, dep) in enumerate(zip(new_heads, new_labels)):
|
||||||
|
doc.c[i].head = head
|
||||||
|
doc.c[i].dep = dep
|
||||||
set_children_from_heads(doc.c, 0, doc.length)
|
set_children_from_heads(doc.c, 0, doc.length)
|
||||||
return doc
|
return doc
|
||||||
|
|
||||||
|
@ -166,13 +172,15 @@ def _find_new_head(token, headlabel):
|
||||||
# if there is none, return the current head (no change)
|
# if there is none, return the current head (no change)
|
||||||
queue = [token.head]
|
queue = [token.head]
|
||||||
n_iter = 0
|
n_iter = 0
|
||||||
|
headlabel = token.vocab.strings.as_int(headlabel)
|
||||||
|
heads = token.doc.to_array(["HEAD"]).astype("int64")
|
||||||
|
labels = [w.dep_ for w in token.doc]
|
||||||
while queue:
|
while queue:
|
||||||
n_iter += 1
|
n_iter += 1
|
||||||
if n_iter >= len(token.doc):
|
if n_iter >= len(token.doc):
|
||||||
print("Infinite loop")
|
texts = [w.text for w in token.doc]
|
||||||
print([(w.i, w.text, w.head.i, w.dep_) for w in token.doc])
|
print(json.dumps(list(zip(range(len(token.doc)), texts, heads, labels)), indent=2))
|
||||||
with open("/tmp/doc.bytes", "wb") as file_:
|
raise ValueError("Infinite loop?")
|
||||||
file_.write(token.doc.to_bytes())
|
|
||||||
next_queue = []
|
next_queue = []
|
||||||
for qtoken in queue:
|
for qtoken in queue:
|
||||||
for child in qtoken.children:
|
for child in qtoken.children:
|
||||||
|
@ -180,7 +188,7 @@ def _find_new_head(token, headlabel):
|
||||||
continue
|
continue
|
||||||
if child == token:
|
if child == token:
|
||||||
continue
|
continue
|
||||||
if child.dep_ == headlabel:
|
if child.dep == headlabel:
|
||||||
return child
|
return child
|
||||||
next_queue.append(child)
|
next_queue.append(child)
|
||||||
queue = next_queue
|
queue = next_queue
|
||||||
|
|
Loading…
Reference in New Issue
Block a user