diff --git a/spacy/ml/models/coref_util.py b/spacy/ml/models/coref_util.py index bd577e65f..9281ad0c7 100644 --- a/spacy/ml/models/coref_util.py +++ b/spacy/ml/models/coref_util.py @@ -143,7 +143,7 @@ def create_head_span_idxs(ops, doclen: int): def get_clusters_from_doc(doc) -> List[List[Tuple[int, int]]]: - """Given a Doc, convert the cluster spans to simple int tuple lists. The + """Given a Doc, convert the cluster spans to simple int tuple lists. The ints are char spans, to be tokenization independent. """ out = [] @@ -203,3 +203,15 @@ def create_gold_scores( # caller needs to convert to array, and add placeholder return out + + +def spans2ints(doc): + """Convert doc.spans to nested list of ints for comparison. + The ints are token indices. + + This is useful for checking consistency of predictions. + """ + out = [] + for key, cluster in doc.spans.items(): + out.append([(ss.start, ss.end) for ss in cluster]) + return out diff --git a/spacy/tests/pipeline/test_coref.py b/spacy/tests/pipeline/test_coref.py index 4b8ca1653..3bde6ad34 100644 --- a/spacy/tests/pipeline/test_coref.py +++ b/spacy/tests/pipeline/test_coref.py @@ -9,6 +9,7 @@ from spacy.ml.models.coref_util import ( DEFAULT_CLUSTER_PREFIX, select_non_crossing_spans, get_sentence_ids, + spans2ints, ) from thinc.util import has_torch @@ -35,16 +36,6 @@ TRAIN_DATA = [ # fmt: on -def spans2ints(doc): - """Convert doc.spans to nested list of ints for comparison. - The ints are token indices. - - This is useful for checking consistency of predictions. - """ - out = [] - for key, cluster in doc.spans.items(): - out.append([(ss.start, ss.end) for ss in cluster]) - return out @pytest.fixture