mirror of
https://github.com/explosion/spaCy.git
synced 2025-10-21 11:14:32 +03:00
This is necessary because one of the three old methods relied on scipy for some complex problem solving. LEA is generally better for evaluations. The downside is that this means evaluations aren't comparable with many papers, but canonical scoring can be supported using external eval scripts or other methods.
637 lines
22 KiB
Python
637 lines
22 KiB
Python
from typing import Iterable, Tuple, Optional, Dict, Callable, Any, List
|
|
import warnings
|
|
|
|
from thinc.types import Floats2d, Floats3d, Ints2d
|
|
from thinc.api import Model, Config, Optimizer, CategoricalCrossentropy
|
|
from thinc.api import set_dropout_rate, to_categorical
|
|
from itertools import islice
|
|
from statistics import mean
|
|
|
|
from .trainable_pipe import TrainablePipe
|
|
from ..language import Language
|
|
from ..training import Example, validate_examples, validate_get_examples
|
|
from ..errors import Errors
|
|
from ..scorer import Scorer
|
|
from ..tokens import Doc
|
|
from ..vocab import Vocab
|
|
|
|
from ..ml.models.coref_util import (
|
|
create_gold_scores,
|
|
MentionClusters,
|
|
create_head_span_idxs,
|
|
get_clusters_from_doc,
|
|
get_predicted_clusters,
|
|
DEFAULT_CLUSTER_PREFIX,
|
|
doc2clusters,
|
|
)
|
|
|
|
from ..coref_scorer import Evaluator, get_cluster_info, lea
|
|
|
|
|
|
default_config = """
|
|
[model]
|
|
@architectures = "spacy.Coref.v1"
|
|
embedding_size = 20
|
|
hidden_size = 1024
|
|
n_hidden_layers = 1
|
|
dropout = 0.3
|
|
rough_k = 50
|
|
a_scoring_batch_size = 512
|
|
sp_embedding_size = 64
|
|
|
|
[model.tok2vec]
|
|
@architectures = "spacy.Tok2Vec.v2"
|
|
|
|
[model.tok2vec.embed]
|
|
@architectures = "spacy.MultiHashEmbed.v1"
|
|
width = 64
|
|
rows = [2000, 2000, 1000, 1000, 1000, 1000]
|
|
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
|
|
include_static_vectors = false
|
|
|
|
[model.tok2vec.encode]
|
|
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
|
width = ${model.tok2vec.embed.width}
|
|
window_size = 1
|
|
maxout_pieces = 3
|
|
depth = 2
|
|
"""
|
|
DEFAULT_MODEL = Config().from_str(default_config)["model"]
|
|
|
|
DEFAULT_CLUSTERS_PREFIX = "coref_clusters"
|
|
|
|
|
|
@Language.factory(
|
|
"coref",
|
|
assigns=["doc.spans"],
|
|
requires=["doc.spans"],
|
|
default_config={
|
|
"model": DEFAULT_MODEL,
|
|
"span_cluster_prefix": DEFAULT_CLUSTER_PREFIX,
|
|
},
|
|
default_score_weights={"coref_f": 1.0, "coref_p": None, "coref_r": None},
|
|
)
|
|
def make_coref(
|
|
nlp: Language,
|
|
name: str,
|
|
model,
|
|
span_cluster_prefix: str = "coref",
|
|
) -> "CoreferenceResolver":
|
|
"""Create a CoreferenceResolver component."""
|
|
|
|
return CoreferenceResolver(
|
|
nlp.vocab, model, name, span_cluster_prefix=span_cluster_prefix
|
|
)
|
|
|
|
|
|
|
|
class CoreferenceResolver(TrainablePipe):
|
|
"""Pipeline component for coreference resolution.
|
|
|
|
DOCS: https://spacy.io/api/coref (TODO)
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
vocab: Vocab,
|
|
model: Model,
|
|
name: str = "coref",
|
|
*,
|
|
span_mentions: str = "coref_mentions",
|
|
span_cluster_prefix: str,
|
|
) -> None:
|
|
"""Initialize a coreference resolution component.
|
|
|
|
vocab (Vocab): The shared vocabulary.
|
|
model (thinc.api.Model): The Thinc Model powering the pipeline component.
|
|
name (str): The component instance name, used to add entries to the
|
|
losses during training.
|
|
span_mentions (str): Key in doc.spans where the candidate coref mentions
|
|
are stored in.
|
|
span_cluster_prefix (str): Prefix for the key in doc.spans to store the
|
|
coref clusters in.
|
|
|
|
DOCS: https://spacy.io/api/coref#init (TODO)
|
|
"""
|
|
self.vocab = vocab
|
|
self.model = model
|
|
self.name = name
|
|
self.span_mentions = span_mentions
|
|
self.span_cluster_prefix = span_cluster_prefix
|
|
self._rehearsal_model = None
|
|
|
|
self.cfg = {}
|
|
|
|
def predict(self, docs: Iterable[Doc]) -> List[MentionClusters]:
|
|
"""Apply the pipeline's model to a batch of docs, without modifying them.
|
|
|
|
docs (Iterable[Doc]): The documents to predict.
|
|
RETURNS: The models prediction for each document.
|
|
|
|
DOCS: https://spacy.io/api/coref#predict (TODO)
|
|
"""
|
|
out = []
|
|
for doc in docs:
|
|
scores, idxs = self.model.predict([doc])
|
|
# idxs is a list of mentions (start / end idxs)
|
|
# each item in scores includes scores and a mapping from scores to mentions
|
|
ant_idxs = idxs
|
|
|
|
# TODO batching
|
|
xp = self.model.ops.xp
|
|
|
|
starts = xp.arange(0, len(doc))
|
|
ends = xp.arange(0, len(doc)) + 1
|
|
|
|
predicted = get_predicted_clusters(xp, starts, ends, ant_idxs, scores)
|
|
out.append(predicted)
|
|
|
|
return out
|
|
|
|
def set_annotations(self, docs: Iterable[Doc], clusters_by_doc) -> None:
|
|
"""Modify a batch of Doc objects, using pre-computed scores.
|
|
|
|
docs (Iterable[Doc]): The documents to modify.
|
|
clusters: The span clusters, produced by CoreferenceResolver.predict.
|
|
|
|
DOCS: https://spacy.io/api/coref#set_annotations (TODO)
|
|
"""
|
|
if len(docs) != len(clusters_by_doc):
|
|
raise ValueError(
|
|
"Found coref clusters incompatible with the "
|
|
"documents provided to the 'coref' component. "
|
|
"This is likely a bug in spaCy."
|
|
)
|
|
for doc, clusters in zip(docs, clusters_by_doc):
|
|
for ii, cluster in enumerate(clusters):
|
|
key = self.span_cluster_prefix + "_" + str(ii)
|
|
if key in doc.spans:
|
|
raise ValueError(
|
|
"Found coref clusters incompatible with the "
|
|
"documents provided to the 'coref' component. "
|
|
"This is likely a bug in spaCy."
|
|
)
|
|
|
|
doc.spans[key] = []
|
|
for mention in cluster:
|
|
doc.spans[key].append(doc[mention[0] : mention[1]])
|
|
|
|
def update(
|
|
self,
|
|
examples: Iterable[Example],
|
|
*,
|
|
drop: float = 0.0,
|
|
sgd: Optional[Optimizer] = None,
|
|
losses: Optional[Dict[str, float]] = None,
|
|
) -> Dict[str, float]:
|
|
"""Learn from a batch of documents and gold-standard information,
|
|
updating the pipe's model. Delegates to predict and get_loss.
|
|
|
|
examples (Iterable[Example]): A batch of Example objects.
|
|
drop (float): The dropout rate.
|
|
sgd (thinc.api.Optimizer): The optimizer.
|
|
losses (Dict[str, float]): Optional record of the loss during training.
|
|
Updated using the component name as the key.
|
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
|
|
|
DOCS: https://spacy.io/api/coref#update (TODO)
|
|
"""
|
|
if losses is None:
|
|
losses = {}
|
|
losses.setdefault(self.name, 0.0)
|
|
validate_examples(examples, "CoreferenceResolver.update")
|
|
if not any(len(eg.predicted) if eg.predicted else 0 for eg in examples):
|
|
# Handle cases where there are no tokens in any docs.
|
|
return losses
|
|
set_dropout_rate(self.model, drop)
|
|
|
|
total_loss = 0
|
|
|
|
for eg in examples:
|
|
# TODO check this causes no issues (in practice it runs)
|
|
preds, backprop = self.model.begin_update([eg.predicted])
|
|
score_matrix, mention_idx = preds
|
|
loss, d_scores = self.get_loss([eg], score_matrix, mention_idx)
|
|
total_loss += loss
|
|
# TODO check shape here
|
|
backprop((d_scores, mention_idx))
|
|
|
|
if sgd is not None:
|
|
self.finish_update(sgd)
|
|
losses[self.name] += total_loss
|
|
return losses
|
|
|
|
def rehearse(
|
|
self,
|
|
examples: Iterable[Example],
|
|
*,
|
|
drop: float = 0.0,
|
|
sgd: Optional[Optimizer] = None,
|
|
losses: Optional[Dict[str, float]] = None,
|
|
) -> Dict[str, float]:
|
|
"""Perform a "rehearsal" update from a batch of data. Rehearsal updates
|
|
teach the current model to make predictions similar to an initial model,
|
|
to try to address the "catastrophic forgetting" problem. This feature is
|
|
experimental.
|
|
|
|
examples (Iterable[Example]): A batch of Example objects.
|
|
drop (float): The dropout rate.
|
|
sgd (thinc.api.Optimizer): The optimizer.
|
|
losses (Dict[str, float]): Optional record of the loss during training.
|
|
Updated using the component name as the key.
|
|
RETURNS (Dict[str, float]): The updated losses dictionary.
|
|
|
|
DOCS: https://spacy.io/api/coref#rehearse (TODO)
|
|
"""
|
|
if losses is not None:
|
|
losses.setdefault(self.name, 0.0)
|
|
if self._rehearsal_model is None:
|
|
return losses
|
|
validate_examples(examples, "CoreferenceResolver.rehearse")
|
|
# TODO test this whole function
|
|
docs = [eg.predicted for eg in examples]
|
|
if not any(len(doc) for doc in docs):
|
|
# Handle cases where there are no tokens in any docs.
|
|
return losses
|
|
set_dropout_rate(self.model, drop)
|
|
scores, bp_scores = self.model.begin_update(docs)
|
|
# TODO below
|
|
target = self._rehearsal_model(examples)
|
|
gradient = scores - target
|
|
bp_scores(gradient)
|
|
if sgd is not None:
|
|
self.finish_update(sgd)
|
|
if losses is not None:
|
|
losses[self.name] += (gradient**2).sum()
|
|
return losses
|
|
|
|
def add_label(self, label: str) -> int:
|
|
"""Technically this method should be implemented from TrainablePipe,
|
|
but it is not relevant for the coref component.
|
|
"""
|
|
raise NotImplementedError(
|
|
Errors.E931.format(
|
|
parent="CoreferenceResolver", method="add_label", name=self.name
|
|
)
|
|
)
|
|
|
|
def get_loss(
|
|
self,
|
|
examples: Iterable[Example],
|
|
score_matrix: List[Tuple[Floats2d, Ints2d]],
|
|
mention_idx: Ints2d,
|
|
):
|
|
"""Find the loss and gradient of loss for the batch of documents and
|
|
their predicted scores.
|
|
|
|
examples (Iterable[Examples]): The batch of examples.
|
|
scores: Scores representing the model's predictions.
|
|
RETURNS (Tuple[float, float]): The loss and the gradient.
|
|
|
|
DOCS: https://spacy.io/api/coref#get_loss (TODO)
|
|
"""
|
|
ops = self.model.ops
|
|
xp = ops.xp
|
|
|
|
# TODO if there is more than one example, give an error
|
|
# (or actually rework this to take multiple things)
|
|
example = examples[0]
|
|
cscores = score_matrix
|
|
cidx = mention_idx
|
|
|
|
clusters = get_clusters_from_doc(example.reference)
|
|
span_idxs = create_head_span_idxs(ops, len(example.predicted))
|
|
gscores = create_gold_scores(span_idxs, clusters)
|
|
gscores = ops.asarray2f(gscores)
|
|
# top_gscores = xp.take_along_axis(gscores, cidx, axis=1)
|
|
top_gscores = xp.take_along_axis(gscores, mention_idx, axis=1)
|
|
# now add the placeholder
|
|
gold_placeholder = ~top_gscores.any(axis=1).T
|
|
gold_placeholder = xp.expand_dims(gold_placeholder, 1)
|
|
top_gscores = xp.concatenate((gold_placeholder, top_gscores), 1)
|
|
|
|
# boolean to float
|
|
top_gscores = ops.asarray2f(top_gscores)
|
|
|
|
with warnings.catch_warnings():
|
|
warnings.filterwarnings("ignore", category=RuntimeWarning)
|
|
log_marg = ops.softmax(cscores + ops.xp.log(top_gscores), axis=1)
|
|
log_norm = ops.softmax(cscores, axis=1)
|
|
grad = log_norm - log_marg
|
|
#gradients.append((grad, cidx))
|
|
loss = float((grad**2).sum())
|
|
|
|
return loss, grad
|
|
|
|
def initialize(
|
|
self,
|
|
get_examples: Callable[[], Iterable[Example]],
|
|
*,
|
|
nlp: Optional[Language] = None,
|
|
) -> None:
|
|
"""Initialize the pipe for training, using a representative set
|
|
of data examples.
|
|
|
|
get_examples (Callable[[], Iterable[Example]]): Function that
|
|
returns a representative sample of gold-standard Example objects.
|
|
nlp (Language): The current nlp object the component is part of.
|
|
|
|
DOCS: https://spacy.io/api/coref#initialize (TODO)
|
|
"""
|
|
validate_get_examples(get_examples, "CoreferenceResolver.initialize")
|
|
|
|
X = []
|
|
Y = []
|
|
for ex in islice(get_examples(), 2):
|
|
X.append(ex.predicted)
|
|
Y.append(ex.reference)
|
|
|
|
assert len(X) > 0, Errors.E923.format(name=self.name)
|
|
self.model.initialize(X=X, Y=Y)
|
|
|
|
def score(self, examples, **kwargs):
|
|
"""Score a batch of examples using LEA.
|
|
|
|
For details on how LEA works and why to use it see the paper:
|
|
|
|
Which Coreference Evaluation Metric Do You Trust? A Proposal for a Link-based Entity Aware Metric
|
|
Moosavi and Strube, 2016
|
|
https://api.semanticscholar.org/CorpusID:17606580
|
|
"""
|
|
|
|
evaluator = Evaluator(lea)
|
|
|
|
for ex in examples:
|
|
p_clusters = doc2clusters(ex.predicted, self.span_cluster_prefix)
|
|
g_clusters = doc2clusters(ex.reference, self.span_cluster_prefix)
|
|
cluster_info = get_cluster_info(p_clusters, g_clusters)
|
|
evaluator.update(cluster_info)
|
|
|
|
score = {
|
|
"coref_f": evaluator.get_f1(),
|
|
"coref_p": evaluator.get_precision(),
|
|
"coref_r": evaluator.get_recall(),
|
|
}
|
|
return score
|
|
|
|
|
|
default_span_predictor_config = """
|
|
[model]
|
|
@architectures = "spacy.SpanPredictor.v1"
|
|
hidden_size = 1024
|
|
dist_emb_size = 64
|
|
|
|
[model.tok2vec]
|
|
@architectures = "spacy.Tok2Vec.v2"
|
|
|
|
[model.tok2vec.embed]
|
|
@architectures = "spacy.MultiHashEmbed.v1"
|
|
width = 64
|
|
rows = [2000, 2000, 1000, 1000, 1000, 1000]
|
|
attrs = ["ORTH", "LOWER", "PREFIX", "SUFFIX", "SHAPE", "ID"]
|
|
include_static_vectors = false
|
|
|
|
[model.tok2vec.encode]
|
|
@architectures = "spacy.MaxoutWindowEncoder.v2"
|
|
width = ${model.tok2vec.embed.width}
|
|
window_size = 1
|
|
maxout_pieces = 3
|
|
depth = 2
|
|
"""
|
|
DEFAULT_SPAN_PREDICTOR_MODEL = Config().from_str(default_span_predictor_config)["model"]
|
|
|
|
@Language.factory(
|
|
"span_predictor",
|
|
assigns=["doc.spans"],
|
|
requires=["doc.spans"],
|
|
default_config={
|
|
"model": DEFAULT_SPAN_PREDICTOR_MODEL,
|
|
"input_prefix": "coref_head_clusters",
|
|
"output_prefix": "coref_clusters",
|
|
},
|
|
default_score_weights={"span_accuracy": 1.0},
|
|
)
|
|
def make_span_predictor(
|
|
nlp: Language,
|
|
name: str,
|
|
model,
|
|
input_prefix: str = "coref_head_clusters",
|
|
output_prefix: str = "coref_clusters",
|
|
) -> "SpanPredictor":
|
|
"""Create a SpanPredictor component."""
|
|
return SpanPredictor(nlp.vocab, model, name, input_prefix=input_prefix, output_prefix=output_prefix)
|
|
|
|
class SpanPredictor(TrainablePipe):
|
|
"""Pipeline component to resolve one-token spans to full spans.
|
|
|
|
Used in coreference resolution.
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
vocab: Vocab,
|
|
model: Model,
|
|
name: str = "span_predictor",
|
|
*,
|
|
input_prefix: str = "coref_head_clusters",
|
|
output_prefix: str = "coref_clusters",
|
|
) -> None:
|
|
self.vocab = vocab
|
|
self.model = model
|
|
self.name = name
|
|
self.input_prefix = input_prefix
|
|
self.output_prefix = output_prefix
|
|
|
|
self.cfg = {}
|
|
|
|
def predict(self, docs: Iterable[Doc]) -> List[MentionClusters]:
|
|
# for now pretend there's just one doc
|
|
|
|
out = []
|
|
for doc in docs:
|
|
# TODO check shape here
|
|
span_scores = self.model.predict([doc])
|
|
if span_scores.size:
|
|
# the information about clustering has to come from the input docs
|
|
# first let's convert the scores to a list of span idxs
|
|
start_scores = span_scores[:, :, 0]
|
|
end_scores = span_scores[:, :, 1]
|
|
starts = start_scores.argmax(axis=1)
|
|
ends = end_scores.argmax(axis=1)
|
|
|
|
# TODO check start < end
|
|
|
|
# get the old clusters (shape will be preserved)
|
|
clusters = doc2clusters(doc, self.input_prefix)
|
|
cidx = 0
|
|
out_clusters = []
|
|
for cluster in clusters:
|
|
ncluster = []
|
|
for mention in cluster:
|
|
ncluster.append((starts[cidx], ends[cidx]))
|
|
cidx += 1
|
|
out_clusters.append(ncluster)
|
|
else:
|
|
out_clusters = []
|
|
out.append(out_clusters)
|
|
return out
|
|
|
|
def set_annotations(self, docs: Iterable[Doc], clusters_by_doc) -> None:
|
|
for doc, clusters in zip(docs, clusters_by_doc):
|
|
for ii, cluster in enumerate(clusters):
|
|
spans = [doc[mm[0]:mm[1]] for mm in cluster]
|
|
doc.spans[f"{self.output_prefix}_{ii}"] = spans
|
|
|
|
def update(
|
|
self,
|
|
examples: Iterable[Example],
|
|
*,
|
|
drop: float = 0.0,
|
|
sgd: Optional[Optimizer] = None,
|
|
losses: Optional[Dict[str, float]] = None,
|
|
) -> Dict[str, float]:
|
|
"""Learn from a batch of documents and gold-standard information,
|
|
updating the pipe's model. Delegates to predict and get_loss.
|
|
"""
|
|
if losses is None:
|
|
losses = {}
|
|
losses.setdefault(self.name, 0.0)
|
|
validate_examples(examples, "SpanPredictor.update")
|
|
if not any(len(eg.reference) if eg.reference else 0 for eg in examples):
|
|
# Handle cases where there are no tokens in any docs.
|
|
return losses
|
|
set_dropout_rate(self.model, drop)
|
|
|
|
total_loss = 0
|
|
for eg in examples:
|
|
span_scores, backprop = self.model.begin_update([eg.predicted])
|
|
# FIXME, this only happens once in the first 1000 docs of OntoNotes
|
|
# and I'm not sure yet why.
|
|
if span_scores.size:
|
|
loss, d_scores = self.get_loss([eg], span_scores)
|
|
total_loss += loss
|
|
# TODO check shape here
|
|
backprop((d_scores))
|
|
|
|
if sgd is not None:
|
|
self.finish_update(sgd)
|
|
losses[self.name] += total_loss
|
|
return losses
|
|
|
|
def rehearse(
|
|
self,
|
|
examples: Iterable[Example],
|
|
*,
|
|
drop: float = 0.0,
|
|
sgd: Optional[Optimizer] = None,
|
|
losses: Optional[Dict[str, float]] = None,
|
|
) -> Dict[str, float]:
|
|
# TODO this should be added later
|
|
raise NotImplementedError(
|
|
Errors.E931.format(
|
|
parent="SpanPredictor", method="add_label", name=self.name
|
|
)
|
|
)
|
|
|
|
def add_label(self, label: str) -> int:
|
|
"""Technically this method should be implemented from TrainablePipe,
|
|
but it is not relevant for this component.
|
|
"""
|
|
raise NotImplementedError(
|
|
Errors.E931.format(
|
|
parent="SpanPredictor", method="add_label", name=self.name
|
|
)
|
|
)
|
|
|
|
def get_loss(
|
|
self,
|
|
examples: Iterable[Example],
|
|
span_scores: Floats3d,
|
|
):
|
|
ops = self.model.ops
|
|
|
|
# NOTE This is doing fake batching, and should always get a list of one example
|
|
assert len(examples) == 1, "Only fake batching is supported."
|
|
# starts and ends are gold starts and ends (Ints1d)
|
|
# span_scores is a Floats3d. What are the axes? mention x token x start/end
|
|
for eg in examples:
|
|
starts = []
|
|
ends = []
|
|
for key, sg in eg.reference.spans.items():
|
|
if key.startswith(self.output_prefix):
|
|
for mention in sg:
|
|
starts.append(mention.start)
|
|
# TODO check: Is the -1 here correct?
|
|
# In Akos's env it works without, but in Paul's it doesn't.
|
|
ends.append(mention.end - 1)
|
|
|
|
starts = self.model.ops.xp.asarray(starts)
|
|
ends = self.model.ops.xp.asarray(ends)
|
|
start_scores = span_scores[:, :, 0]
|
|
end_scores = span_scores[:, :, 1]
|
|
n_classes = start_scores.shape[1]
|
|
start_probs = ops.softmax(start_scores, axis=1)
|
|
end_probs = ops.softmax(end_scores, axis=1)
|
|
start_targets = to_categorical(starts, n_classes)
|
|
end_targets = to_categorical(ends, n_classes)
|
|
start_grads = (start_probs - start_targets)
|
|
end_grads = (end_probs - end_targets)
|
|
grads = ops.xp.stack((start_grads, end_grads), axis=2)
|
|
loss = float((grads ** 2).sum())
|
|
return loss, grads
|
|
|
|
def initialize(
|
|
self,
|
|
get_examples: Callable[[], Iterable[Example]],
|
|
*,
|
|
nlp: Optional[Language] = None,
|
|
) -> None:
|
|
validate_get_examples(get_examples, "SpanPredictor.initialize")
|
|
|
|
X = []
|
|
Y = []
|
|
for ex in islice(get_examples(), 2):
|
|
|
|
if not ex.predicted.spans:
|
|
# set placeholder for shape inference
|
|
doc = ex.predicted
|
|
assert len(doc) > 2, "Coreference requires at least two tokens"
|
|
doc.spans[f"{self.input_prefix}_0"] = [doc[0:1], doc[1:2]]
|
|
X.append(ex.predicted)
|
|
Y.append(ex.reference)
|
|
|
|
assert len(X) > 0, Errors.E923.format(name=self.name)
|
|
self.model.initialize(X=X, Y=Y)
|
|
|
|
def score(self, examples, **kwargs):
|
|
"""
|
|
Evaluate on reconstructing the correct spans around
|
|
gold heads.
|
|
"""
|
|
scores = []
|
|
xp = self.model.ops.xp
|
|
for eg in examples:
|
|
starts = []
|
|
ends = []
|
|
pred_starts = []
|
|
pred_ends = []
|
|
ref = eg.reference
|
|
pred = eg.predicted
|
|
for key, gold_sg in ref.spans.items():
|
|
if key.startswith(self.output_prefix):
|
|
pred_sg = pred.spans[key]
|
|
for gold_mention, pred_mention in zip(gold_sg, pred_sg):
|
|
starts.append(gold_mention.start)
|
|
ends.append(gold_mention.end)
|
|
pred_starts.append(pred_mention.start)
|
|
pred_ends.append(pred_mention.end)
|
|
|
|
starts = xp.asarray(starts)
|
|
ends = xp.asarray(ends)
|
|
pred_starts = xp.asarray(pred_starts)
|
|
pred_ends = xp.asarray(pred_ends)
|
|
correct = (starts == pred_starts) * (ends == pred_ends)
|
|
accuracy = correct.mean()
|
|
scores.append(float(accuracy))
|
|
return {"span_accuracy": mean(scores)}
|