Clean up unused functions

`make_clean_doc` is not needed and was removed.

`logsumexp` may be needed if I misunderstood the loss calculation, so I
left it in for now with a note.
This commit is contained in:
Paul O'Leary McCann 2021-05-28 15:56:20 +09:00
parent 0aa1083ce8
commit 4a4ef72191

View File

@ -44,11 +44,13 @@ def topk(xp, arr, k, axis=None):
def logsumexp(xp, arr, axis=None):
"""Emulate torch.logsumexp by returning the log of summed exponentials
along each row in the given dimension.
TODO: currently not used?
Reduces a 2d array to 1d."""
# from slide 5 here:
# https://www.slideshare.net/ryokuta/cupy
# Note: this was added to reproduce loss calculation in coref-hoi. If loss
# can be calculated using another method this is not necessary.
hi = arr.max(axis=axis)
hi = xp.expand_dims(hi, 1)
return hi.squeeze() + xp.log(xp.exp(arr - hi).sum(axis=axis))
@ -215,17 +217,6 @@ def get_clusters_from_doc(doc) -> List[List[Tuple[int, int]]]:
return out
def make_clean_doc(nlp, doc):
"""Return a doc with raw data but not span annotations."""
# Surely there is a better way to do this?
# TODO: currently not used?
sents = [tok.is_sent_start for tok in doc]
words = [tok.text for tok in doc]
out = Doc(nlp.vocab, words=words, sent_starts=sents)
return out
def create_gold_scores(
ments: Ints2d, clusters: List[List[Tuple[int, int]]]
) -> List[List[bool]]: