mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-02 18:06:46 +03:00
Add more foolproof logging
This commit is contained in:
parent
cb87d43b00
commit
9964dad76e
|
@ -109,7 +109,24 @@ def projectivize(heads, labels):
|
||||||
return proj_heads, deco_labels
|
return proj_heads, deco_labels
|
||||||
|
|
||||||
|
|
||||||
|
LOG_DIR = Path("/tmp/nonproj_log")
|
||||||
|
if not LOG_DIR.exists():
|
||||||
|
LOG_DIR.mkdir(parents=True)
|
||||||
|
file_num = 0
|
||||||
cpdef deprojectivize(Doc doc):
|
cpdef deprojectivize(Doc doc):
|
||||||
|
global file_num
|
||||||
|
# Log the parse
|
||||||
|
heads = []
|
||||||
|
labels = []
|
||||||
|
for i in range(doc.length):
|
||||||
|
heads.append(doc.c[i].head)
|
||||||
|
labels.append(doc.vocab.strings[doc.c[i].dep])
|
||||||
|
texts = [w.text for w in doc]
|
||||||
|
indices = list(range(len(doc)))
|
||||||
|
labels = [w.dep_ for w in token.doc]
|
||||||
|
with (LOG_DIR / f"{file_num}.json").open("w") as file_:
|
||||||
|
file_.write(json.dumps(list(zip(indices, texts, heads, labels)), indent=2))
|
||||||
|
file_num += 1
|
||||||
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
# Reattach arcs with decorated labels (following HEAD scheme). For each
|
||||||
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
# decorated arc X||Y, search top-down, left-to-right, breadth-first until
|
||||||
# hitting a Y then make this the new head.
|
# hitting a Y then make this the new head.
|
||||||
|
@ -168,13 +185,9 @@ def _find_new_head(token, headlabel):
|
||||||
queue = [token.head]
|
queue = [token.head]
|
||||||
n_iter = 0
|
n_iter = 0
|
||||||
headlabel = token.vocab.strings.as_int(headlabel)
|
headlabel = token.vocab.strings.as_int(headlabel)
|
||||||
heads = token.doc.to_array(["HEAD"]).astype("int64")
|
|
||||||
labels = [w.dep_ for w in token.doc]
|
|
||||||
while queue:
|
while queue:
|
||||||
n_iter += 1
|
n_iter += 1
|
||||||
if n_iter >= len(token.doc):
|
if n_iter >= len(token.doc):
|
||||||
texts = [w.text for w in token.doc]
|
|
||||||
print(json.dumps(list(zip(range(len(token.doc)), texts, heads, labels)), indent=2))
|
|
||||||
raise ValueError("Infinite loop?")
|
raise ValueError("Infinite loop?")
|
||||||
next_queue = []
|
next_queue = []
|
||||||
for qtoken in queue:
|
for qtoken in queue:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user