mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-06 14:40:34 +03:00
Suggest approach for GoldParse
This commit is contained in:
parent
b078b05ecd
commit
5564314d32
|
@ -76,10 +76,15 @@ cdef class NewExample:
|
|||
raise NotImplementedError
|
||||
|
||||
def to_dict(self):
|
||||
""" Note that this method does NOT export the doc, only the annotations ! """
|
||||
token_dict = self._token_annotation
|
||||
doc_dict = self._doc_annotation
|
||||
return {"token_annotation": token_dict, "doc_annotation": doc_dict}
|
||||
# We should probably implement this? We could return the
|
||||
# doc_annotation and token_annotation, and this would allow us to
|
||||
# easily implement the `get_parses_from_example` in
|
||||
# spacy.syntax.gold_parse
|
||||
raise NotImplementedError
|
||||
|
||||
def split_sents(self):
|
||||
# Unclear whether we should really implement this. I guess?
|
||||
raise NotImplementedError
|
||||
|
||||
def text(self):
|
||||
return self.x.text
|
||||
|
|
|
@ -25,54 +25,34 @@ def is_punct_label(label):
|
|||
|
||||
|
||||
def get_parses_from_example(
|
||||
eg, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
|
||||
example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
|
||||
):
|
||||
"""Return a list of (doc, GoldParse) objects.
|
||||
If merge is set to True, keep all Token annotations as one big list."""
|
||||
d = eg.doc_annotation
|
||||
# merge == do not modify Example
|
||||
if merge:
|
||||
t = eg.token_annotation
|
||||
doc = eg.doc
|
||||
if doc is None or not isinstance(doc, Doc):
|
||||
if not vocab:
|
||||
raise ValueError(Errors.E998)
|
||||
doc = Doc(vocab, words=t.words)
|
||||
try:
|
||||
gp = GoldParse.from_annotation(
|
||||
doc, d, t, make_projective=make_projective
|
||||
)
|
||||
except AlignmentError:
|
||||
if ignore_misaligned:
|
||||
gp = None
|
||||
examples = [example]
|
||||
else:
|
||||
raise
|
||||
return [(doc, gp)]
|
||||
# not merging: one GoldParse per sentence, defining docs with the words
|
||||
# from each sentence
|
||||
else:
|
||||
parses = []
|
||||
split_examples = eg.split_sents()
|
||||
for split_example in split_examples:
|
||||
if not vocab:
|
||||
raise ValueError(Errors.E998)
|
||||
split_doc = Doc(vocab, words=split_example.token_annotation.words)
|
||||
examples = eg.split_sents()
|
||||
outputs = []
|
||||
for eg in examples:
|
||||
eg_dict = eg.to_dict()
|
||||
try:
|
||||
gp = GoldParse.from_annotation(
|
||||
split_doc,
|
||||
d,
|
||||
split_example.token_annotation,
|
||||
make_projective=make_projective,
|
||||
eg.predicted,
|
||||
eg_dict["doc_annotation"],
|
||||
eg_dict["token_annotation"],
|
||||
make_projective=make_projective
|
||||
)
|
||||
except AlignmentError:
|
||||
if ignore_misaligned:
|
||||
gp = None
|
||||
else:
|
||||
raise
|
||||
if gp is not None:
|
||||
parses.append((split_doc, gp))
|
||||
return parses
|
||||
|
||||
outputs.append((eg.predicted, gp))
|
||||
return outputs
|
||||
|
||||
|
||||
cdef class GoldParse:
|
||||
|
|
Loading…
Reference in New Issue
Block a user