mirror of
https://github.com/explosion/spaCy.git
synced 2025-02-06 14:40:34 +03:00
Suggest approach for GoldParse
This commit is contained in:
parent
b078b05ecd
commit
5564314d32
|
@ -76,10 +76,15 @@ cdef class NewExample:
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
def to_dict(self):
|
def to_dict(self):
|
||||||
""" Note that this method does NOT export the doc, only the annotations ! """
|
# We should probably implement this? We could return the
|
||||||
token_dict = self._token_annotation
|
# doc_annotation and token_annotation, and this would allow us to
|
||||||
doc_dict = self._doc_annotation
|
# easily implement the `get_parses_from_example` in
|
||||||
return {"token_annotation": token_dict, "doc_annotation": doc_dict}
|
# spacy.syntax.gold_parse
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def split_sents(self):
|
||||||
|
# Unclear whether we should really implement this. I guess?
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def text(self):
|
def text(self):
|
||||||
return self.x.text
|
return self.x.text
|
||||||
|
|
|
@ -25,54 +25,34 @@ def is_punct_label(label):
|
||||||
|
|
||||||
|
|
||||||
def get_parses_from_example(
|
def get_parses_from_example(
|
||||||
eg, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
|
example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
|
||||||
):
|
):
|
||||||
"""Return a list of (doc, GoldParse) objects.
|
"""Return a list of (doc, GoldParse) objects.
|
||||||
If merge is set to True, keep all Token annotations as one big list."""
|
If merge is set to True, keep all Token annotations as one big list."""
|
||||||
d = eg.doc_annotation
|
|
||||||
# merge == do not modify Example
|
# merge == do not modify Example
|
||||||
if merge:
|
if merge:
|
||||||
t = eg.token_annotation
|
examples = [example]
|
||||||
doc = eg.doc
|
|
||||||
if doc is None or not isinstance(doc, Doc):
|
|
||||||
if not vocab:
|
|
||||||
raise ValueError(Errors.E998)
|
|
||||||
doc = Doc(vocab, words=t.words)
|
|
||||||
try:
|
|
||||||
gp = GoldParse.from_annotation(
|
|
||||||
doc, d, t, make_projective=make_projective
|
|
||||||
)
|
|
||||||
except AlignmentError:
|
|
||||||
if ignore_misaligned:
|
|
||||||
gp = None
|
|
||||||
else:
|
else:
|
||||||
raise
|
|
||||||
return [(doc, gp)]
|
|
||||||
# not merging: one GoldParse per sentence, defining docs with the words
|
# not merging: one GoldParse per sentence, defining docs with the words
|
||||||
# from each sentence
|
# from each sentence
|
||||||
else:
|
examples = eg.split_sents()
|
||||||
parses = []
|
outputs = []
|
||||||
split_examples = eg.split_sents()
|
for eg in examples:
|
||||||
for split_example in split_examples:
|
eg_dict = eg.to_dict()
|
||||||
if not vocab:
|
|
||||||
raise ValueError(Errors.E998)
|
|
||||||
split_doc = Doc(vocab, words=split_example.token_annotation.words)
|
|
||||||
try:
|
try:
|
||||||
gp = GoldParse.from_annotation(
|
gp = GoldParse.from_annotation(
|
||||||
split_doc,
|
eg.predicted,
|
||||||
d,
|
eg_dict["doc_annotation"],
|
||||||
split_example.token_annotation,
|
eg_dict["token_annotation"],
|
||||||
make_projective=make_projective,
|
make_projective=make_projective
|
||||||
)
|
)
|
||||||
except AlignmentError:
|
except AlignmentError:
|
||||||
if ignore_misaligned:
|
if ignore_misaligned:
|
||||||
gp = None
|
gp = None
|
||||||
else:
|
else:
|
||||||
raise
|
raise
|
||||||
if gp is not None:
|
outputs.append((eg.predicted, gp))
|
||||||
parses.append((split_doc, gp))
|
return outputs
|
||||||
return parses
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
cdef class GoldParse:
|
cdef class GoldParse:
|
||||||
|
|
Loading…
Reference in New Issue
Block a user