mirror of
https://github.com/explosion/spaCy.git
synced 2025-07-18 12:12:20 +03:00
Remove unused functions
This commit is contained in:
parent
d0ae2590db
commit
5650853c0f
|
@ -56,21 +56,6 @@ def doc2clusters(doc: Doc, prefix=DEFAULT_CLUSTER_PREFIX) -> MentionClusters:
|
|||
return out
|
||||
|
||||
|
||||
def topk(xp, arr, k, axis=1):
|
||||
"""Given an array and a k value, give the top values and idxs for each row."""
|
||||
|
||||
part = xp.argpartition(arr, -k, axis=axis)
|
||||
idxs = xp.flip(part)[:, :k]
|
||||
|
||||
vals = xp.take_along_axis(arr, idxs, axis=axis)
|
||||
|
||||
sidxs = xp.argsort(-vals, axis=axis)
|
||||
# map these idxs back to the original
|
||||
oidxs = xp.take_along_axis(idxs, sidxs, axis=axis)
|
||||
svals = xp.take_along_axis(vals, sidxs, axis=axis)
|
||||
return svals, oidxs
|
||||
|
||||
|
||||
# from model.py, refactored to be non-member
|
||||
def get_predicted_antecedents(xp, antecedent_idx, antecedent_scores):
|
||||
"""Get the ID of the antecedent for each span. -1 if no antecedent."""
|
||||
|
@ -124,55 +109,6 @@ def get_predicted_clusters(
|
|||
return predicted_clusters
|
||||
|
||||
|
||||
def get_sentence_map(doc: Doc):
|
||||
"""For the given span, return a list of sentence indexes."""
|
||||
if doc.has_annotation("SENT_START"):
|
||||
si = 0
|
||||
out = []
|
||||
for sent in doc.sents:
|
||||
for _ in sent:
|
||||
out.append(si)
|
||||
si += 1
|
||||
return out
|
||||
else:
|
||||
# If there are no sents then just return dummy values.
|
||||
# Shouldn't happen in general training, but typical in init.
|
||||
return [0] * len(doc)
|
||||
|
||||
|
||||
def get_candidate_mentions(
|
||||
doc: Doc, max_span_width: int = 20
|
||||
) -> Tuple[List[int], List[int]]:
|
||||
"""Given a Doc, return candidate mentions.
|
||||
|
||||
This isn't a trainable layer, it just returns raw candidates.
|
||||
"""
|
||||
# XXX Note that in coref-hoi the indexes are designed so you actually want [i:j+1], but here
|
||||
# we're using [i:j], which is more natural.
|
||||
|
||||
sentence_map = get_sentence_map(doc)
|
||||
|
||||
begins = []
|
||||
ends = []
|
||||
for tok in doc:
|
||||
si = sentence_map[tok.i] # sentence index
|
||||
for ii in range(1, max_span_width):
|
||||
ei = tok.i + ii # end index
|
||||
|
||||
# Note: this matches slice syntax, so the token index is one less
|
||||
if ei > len(doc) or sentence_map[ei - 1] != si:
|
||||
break
|
||||
|
||||
begins.append(tok.i)
|
||||
ends.append(ei)
|
||||
|
||||
return (begins, ends)
|
||||
|
||||
|
||||
@registry.misc("spacy.CorefCandidateGenerator.v1")
|
||||
def create_mention_generator() -> Any:
|
||||
return get_candidate_mentions
|
||||
|
||||
|
||||
def select_non_crossing_spans(
|
||||
idxs: List[int], starts: List[int], ends: List[int], limit: int
|
||||
|
|
Loading…
Reference in New Issue
Block a user