enforce that the gold and predicted documents have the same text

This commit is contained in:
kadarakos 2023-05-08 11:18:06 +00:00
parent 9d6793604e
commit d5da2df4c9
2 changed files with 4 additions and 0 deletions

View File

@ -972,6 +972,8 @@ class Errors(metaclass=ErrorsWithCodes):
E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.")
E1052 = ("Both 'min_length' and 'max_length' should be larger than 1, but found"
" 'min_length': {min_length}, 'max_length': {max_length}")
E1053 = ("Text, including whitespace, must match between reference and "
"predicted docs when training {component}.")
# Deprecated model shortcuts, only used in errors and warnings

View File

@ -294,6 +294,8 @@ class SpanFinder(TrainablePipe):
reference_truths = []
for eg in examples:
if eg.x.text != eg.y.text:
raise ValueError(Errors.E1053.format(component="span_finder"))
start_indices = set()
end_indices = set()