mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-24 17:06:29 +03:00
Provide more info in cycle error message E069 (#4123)
Provide the tokens in the cycle and the first 50 tokens from document in the error message so it's easier to track down the location of the cycle in the data. Addresses feature request in #3698.
This commit is contained in:
parent
2f3648700c
commit
2f9b28c218
|
@ -243,7 +243,8 @@ class Errors(object):
|
|||
"Tag sequence:\n{tags}")
|
||||
E068 = ("Invalid BILUO tag: '{tag}'.")
|
||||
E069 = ("Invalid gold-standard parse tree. Found cycle between word "
|
||||
"IDs: {cycle}")
|
||||
"IDs: {cycle} (tokens: {cycle_tokens}) in the document starting "
|
||||
"with tokens: {doc_tokens}.")
|
||||
E070 = ("Invalid gold-standard data. Number of documents ({n_docs}) "
|
||||
"does not align with number of annotations ({n_annots}).")
|
||||
E071 = ("Error creating lexeme: specified orth ID ({orth}) does not "
|
||||
|
|
|
@ -590,7 +590,7 @@ cdef class GoldParse:
|
|||
|
||||
cycle = nonproj.contains_cycle(self.heads)
|
||||
if cycle is not None:
|
||||
raise ValueError(Errors.E069.format(cycle=cycle))
|
||||
raise ValueError(Errors.E069.format(cycle=cycle, cycle_tokens=" ".join(["'{}'".format(self.words[tok_id]) for tok_id in cycle]), doc_tokens=" ".join(words[:50])))
|
||||
|
||||
def __len__(self):
|
||||
"""Get the number of gold-standard tokens.
|
||||
|
|
Loading…
Reference in New Issue
Block a user