From 5564314d323f746a180a81888e76166a3687ff11 Mon Sep 17 00:00:00 2001
From: Matthew Honnibal <honnibal+gh@gmail.com>
Date: Sat, 13 Jun 2020 15:43:35 +0200
Subject: [PATCH] Suggest approach for GoldParse

---
 spacy/gold/new_example.pyx  | 13 +++++++---
 spacy/syntax/gold_parse.pyx | 50 +++++++++++--------------------------
 2 files changed, 24 insertions(+), 39 deletions(-)

diff --git a/spacy/gold/new_example.pyx b/spacy/gold/new_example.pyx
index eb796eb83..d9a712e38 100644
--- a/spacy/gold/new_example.pyx
+++ b/spacy/gold/new_example.pyx
@@ -76,10 +76,15 @@ cdef class NewExample:
         raise NotImplementedError
 
     def to_dict(self):
-        """ Note that this method does NOT export the doc, only the annotations ! """
-        token_dict = self._token_annotation
-        doc_dict = self._doc_annotation
-        return {"token_annotation": token_dict, "doc_annotation": doc_dict}
+        # We should probably implement this? We could return the 
+        # doc_annotation and token_annotation, and this would allow us to
+        # easily implement the `get_parses_from_example` in
+        # spacy.syntax.gold_parse
+        raise NotImplementedError
+
+    def split_sents(self):
+        # Unclear whether we should really implement this. I guess?
+        raise NotImplementedError
 
     def text(self):
         return self.x.text
diff --git a/spacy/syntax/gold_parse.pyx b/spacy/syntax/gold_parse.pyx
index 05361fd82..9712f6e94 100644
--- a/spacy/syntax/gold_parse.pyx
+++ b/spacy/syntax/gold_parse.pyx
@@ -25,54 +25,34 @@ def is_punct_label(label):
 
 
 def get_parses_from_example(
-    eg, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
+    example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
 ):
     """Return a list of (doc, GoldParse) objects.
     If merge is set to True, keep all Token annotations as one big list."""
-    d = eg.doc_annotation
     # merge == do not modify Example
     if merge:
-        t = eg.token_annotation
-        doc = eg.doc
-        if doc is None or not isinstance(doc, Doc):
-            if not vocab:
-                raise ValueError(Errors.E998)
-            doc = Doc(vocab, words=t.words)
+        examples = [example]
+    else:
+        # not merging: one GoldParse per sentence, defining docs with the words
+        # from each sentence
+        examples = eg.split_sents()
+    outputs = []
+    for eg in examples:
+        eg_dict = eg.to_dict()
         try:
             gp = GoldParse.from_annotation(
-                doc, d, t, make_projective=make_projective
+                eg.predicted,
+                eg_dict["doc_annotation"],
+                eg_dict["token_annotation"],
+                make_projective=make_projective
             )
         except AlignmentError:
             if ignore_misaligned:
                 gp = None
             else:
                 raise
-        return [(doc, gp)]
-    # not merging: one GoldParse per sentence, defining docs with the words
-    # from each sentence
-    else:
-        parses = []
-        split_examples = eg.split_sents()
-        for split_example in split_examples:
-            if not vocab:
-                raise ValueError(Errors.E998)
-            split_doc = Doc(vocab, words=split_example.token_annotation.words)
-            try:
-                gp = GoldParse.from_annotation(
-                    split_doc,
-                    d,
-                    split_example.token_annotation,
-                    make_projective=make_projective,
-                )
-            except AlignmentError:
-                if ignore_misaligned:
-                    gp = None
-                else:
-                    raise
-            if gp is not None:
-                parses.append((split_doc, gp))
-        return parses
-
+        outputs.append((eg.predicted, gp))
+    return outputs
 
 
 cdef class GoldParse: