Move spans2ints to util

This commit is contained in:
Paul O'Leary McCann 2022-07-03 15:12:53 +09:00
parent 1dacecbbfb
commit 201731df2d
2 changed files with 14 additions and 11 deletions

View File

@ -143,7 +143,7 @@ def create_head_span_idxs(ops, doclen: int):
def get_clusters_from_doc(doc) -> List[List[Tuple[int, int]]]:
"""Given a Doc, convert the cluster spans to simple int tuple lists. The
"""Given a Doc, convert the cluster spans to simple int tuple lists. The
ints are char spans, to be tokenization independent.
"""
out = []
@ -203,3 +203,15 @@ def create_gold_scores(
# caller needs to convert to array, and add placeholder
return out
def spans2ints(doc):
"""Convert doc.spans to nested list of ints for comparison.
The ints are token indices.
This is useful for checking consistency of predictions.
"""
out = []
for key, cluster in doc.spans.items():
out.append([(ss.start, ss.end) for ss in cluster])
return out

View File

@ -9,6 +9,7 @@ from spacy.ml.models.coref_util import (
DEFAULT_CLUSTER_PREFIX,
select_non_crossing_spans,
get_sentence_ids,
spans2ints,
)
from thinc.util import has_torch
@ -35,16 +36,6 @@ TRAIN_DATA = [
# fmt: on
def spans2ints(doc):
"""Convert doc.spans to nested list of ints for comparison.
The ints are token indices.
This is useful for checking consistency of predictions.
"""
out = []
for key, cluster in doc.spans.items():
out.append([(ss.start, ss.end) for ss in cluster])
return out
@pytest.fixture