diff --git a/spacy/errors.py b/spacy/errors.py index 9a8cf63c9..96e84ef8d 100644 --- a/spacy/errors.py +++ b/spacy/errors.py @@ -972,6 +972,8 @@ class Errors(metaclass=ErrorsWithCodes): E1051 = ("'allow_overlap' can only be False when max_positive is 1, but found 'max_positive': {max_positive}.") E1052 = ("Both 'min_length' and 'max_length' should be larger than 1, but found" " 'min_length': {min_length}, 'max_length': {max_length}") + E1053 = ("Text, including whitespace, must match between reference and " + "predicted docs when training {component}.") # Deprecated model shortcuts, only used in errors and warnings diff --git a/spacy/pipeline/span_finder.py b/spacy/pipeline/span_finder.py index 4aa526c5a..258781be0 100644 --- a/spacy/pipeline/span_finder.py +++ b/spacy/pipeline/span_finder.py @@ -294,6 +294,8 @@ class SpanFinder(TrainablePipe): reference_truths = [] for eg in examples: + if eg.x.text != eg.y.text: + raise ValueError(Errors.E1053.format(component="span_finder")) start_indices = set() end_indices = set()