Suggest approach for GoldParse

2025-07-01 02:13:07 +03:00 · 2020-06-13 15:43:35 +02:00 · 2020-06-13 15:43:35 +02:00 · 5564314d32
commit 5564314d32
parent b078b05ecd
2 changed files with 24 additions and 39 deletions
--- a/spacy/gold/new_example.pyx
+++ b/spacy/gold/new_example.pyx
@ -76,10 +76,15 @@ cdef class NewExample:
        raise NotImplementedError
    def to_dict(self):
-        """ Note that this method does NOT export the doc, only the annotations ! """
+        # We should probably implement this? We could return the 
-        token_dict = self._token_annotation
+        # doc_annotation and token_annotation, and this would allow us to
-        doc_dict = self._doc_annotation
+        # easily implement the `get_parses_from_example` in
-        return {"token_annotation": token_dict, "doc_annotation": doc_dict}
+        # spacy.syntax.gold_parse
        raise NotImplementedError
    def split_sents(self):
        # Unclear whether we should really implement this. I guess?
        raise NotImplementedError
    def text(self):
        return self.x.text
--- a/spacy/syntax/gold_parse.pyx
+++ b/spacy/syntax/gold_parse.pyx
@ -25,54 +25,34 @@ def is_punct_label(label):
 def get_parses_from_example(
-    eg, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
+    example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
 ):
    """Return a list of (doc, GoldParse) objects.
    If merge is set to True, keep all Token annotations as one big list."""
    d = eg.doc_annotation
    # merge == do not modify Example
    if merge:
-        t = eg.token_annotation
+        examples = [example]
        doc = eg.doc
        if doc is None or not isinstance(doc, Doc):
            if not vocab:
                raise ValueError(Errors.E998)
            doc = Doc(vocab, words=t.words)
        try:
            gp = GoldParse.from_annotation(
                doc, d, t, make_projective=make_projective
            )
        except AlignmentError:
            if ignore_misaligned:
                gp = None
    else:
                raise
        return [(doc, gp)]
        # not merging: one GoldParse per sentence, defining docs with the words
        # from each sentence
-    else:
+        examples = eg.split_sents()
-        parses = []
+    outputs = []
-        split_examples = eg.split_sents()
+    for eg in examples:
-        for split_example in split_examples:
+        eg_dict = eg.to_dict()
            if not vocab:
                raise ValueError(Errors.E998)
            split_doc = Doc(vocab, words=split_example.token_annotation.words)
        try:
            gp = GoldParse.from_annotation(
-                    split_doc,
+                eg.predicted,
-                    d,
+                eg_dict["doc_annotation"],
-                    split_example.token_annotation,
+                eg_dict["token_annotation"],
-                    make_projective=make_projective,
+                make_projective=make_projective
            )
        except AlignmentError:
            if ignore_misaligned:
                gp = None
            else:
                raise
-            if gp is not None:
+        outputs.append((eg.predicted, gp))
-                parses.append((split_doc, gp))
+    return outputs
        return parses
 cdef class GoldParse: