Feedback from code review

This commit is contained in:
Paul O'Leary McCann 2022-07-06 14:03:09 +09:00
parent 63e27b5e44
commit 8f598d7b01
2 changed files with 2 additions and 2 deletions

View File

@ -205,7 +205,7 @@ def create_gold_scores(
return out return out
def _spans_to_offsets(doc): def _spans_to_offsets(doc: Doc) -> List[List[Tuple[int, int]]]:
"""Convert doc.spans to nested list of ints for comparison. """Convert doc.spans to nested list of ints for comparison.
The ints are character indices, and the spans groups are sorted by key first. The ints are character indices, and the spans groups are sorted by key first.

View File

@ -154,7 +154,7 @@ def test_tokenization_mismatch(nlp):
for key, cluster in ref.spans.items(): for key, cluster in ref.spans.items():
char_spans[key] = [] char_spans[key] = []
for span in cluster: for span in cluster:
char_spans[key].append((span[0].idx, span[-1].idx + len(span[-1]))) char_spans[key].append((span.start_char, span.end_char))
with ref.retokenize() as retokenizer: with ref.retokenize() as retokenizer:
# merge "picked up" # merge "picked up"
retokenizer.merge(ref[2:4]) retokenizer.merge(ref[2:4])