mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Suggest approach for GoldParse
This commit is contained in:
		
							parent
							
								
									b078b05ecd
								
							
						
					
					
						commit
						5564314d32
					
				| 
						 | 
					@ -76,10 +76,15 @@ cdef class NewExample:
 | 
				
			||||||
        raise NotImplementedError
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def to_dict(self):
 | 
					    def to_dict(self):
 | 
				
			||||||
        """ Note that this method does NOT export the doc, only the annotations ! """
 | 
					        # We should probably implement this? We could return the 
 | 
				
			||||||
        token_dict = self._token_annotation
 | 
					        # doc_annotation and token_annotation, and this would allow us to
 | 
				
			||||||
        doc_dict = self._doc_annotation
 | 
					        # easily implement the `get_parses_from_example` in
 | 
				
			||||||
        return {"token_annotation": token_dict, "doc_annotation": doc_dict}
 | 
					        # spacy.syntax.gold_parse
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def split_sents(self):
 | 
				
			||||||
 | 
					        # Unclear whether we should really implement this. I guess?
 | 
				
			||||||
 | 
					        raise NotImplementedError
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def text(self):
 | 
					    def text(self):
 | 
				
			||||||
        return self.x.text
 | 
					        return self.x.text
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,54 +25,34 @@ def is_punct_label(label):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_parses_from_example(
 | 
					def get_parses_from_example(
 | 
				
			||||||
    eg, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
 | 
					    example, merge=True, vocab=None, make_projective=True, ignore_misaligned=False
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """Return a list of (doc, GoldParse) objects.
 | 
					    """Return a list of (doc, GoldParse) objects.
 | 
				
			||||||
    If merge is set to True, keep all Token annotations as one big list."""
 | 
					    If merge is set to True, keep all Token annotations as one big list."""
 | 
				
			||||||
    d = eg.doc_annotation
 | 
					 | 
				
			||||||
    # merge == do not modify Example
 | 
					    # merge == do not modify Example
 | 
				
			||||||
    if merge:
 | 
					    if merge:
 | 
				
			||||||
        t = eg.token_annotation
 | 
					        examples = [example]
 | 
				
			||||||
        doc = eg.doc
 | 
					    else:
 | 
				
			||||||
        if doc is None or not isinstance(doc, Doc):
 | 
					        # not merging: one GoldParse per sentence, defining docs with the words
 | 
				
			||||||
            if not vocab:
 | 
					        # from each sentence
 | 
				
			||||||
                raise ValueError(Errors.E998)
 | 
					        examples = eg.split_sents()
 | 
				
			||||||
            doc = Doc(vocab, words=t.words)
 | 
					    outputs = []
 | 
				
			||||||
 | 
					    for eg in examples:
 | 
				
			||||||
 | 
					        eg_dict = eg.to_dict()
 | 
				
			||||||
        try:
 | 
					        try:
 | 
				
			||||||
            gp = GoldParse.from_annotation(
 | 
					            gp = GoldParse.from_annotation(
 | 
				
			||||||
                doc, d, t, make_projective=make_projective
 | 
					                eg.predicted,
 | 
				
			||||||
 | 
					                eg_dict["doc_annotation"],
 | 
				
			||||||
 | 
					                eg_dict["token_annotation"],
 | 
				
			||||||
 | 
					                make_projective=make_projective
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
        except AlignmentError:
 | 
					        except AlignmentError:
 | 
				
			||||||
            if ignore_misaligned:
 | 
					            if ignore_misaligned:
 | 
				
			||||||
                gp = None
 | 
					                gp = None
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                raise
 | 
					                raise
 | 
				
			||||||
        return [(doc, gp)]
 | 
					        outputs.append((eg.predicted, gp))
 | 
				
			||||||
    # not merging: one GoldParse per sentence, defining docs with the words
 | 
					    return outputs
 | 
				
			||||||
    # from each sentence
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        parses = []
 | 
					 | 
				
			||||||
        split_examples = eg.split_sents()
 | 
					 | 
				
			||||||
        for split_example in split_examples:
 | 
					 | 
				
			||||||
            if not vocab:
 | 
					 | 
				
			||||||
                raise ValueError(Errors.E998)
 | 
					 | 
				
			||||||
            split_doc = Doc(vocab, words=split_example.token_annotation.words)
 | 
					 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
                gp = GoldParse.from_annotation(
 | 
					 | 
				
			||||||
                    split_doc,
 | 
					 | 
				
			||||||
                    d,
 | 
					 | 
				
			||||||
                    split_example.token_annotation,
 | 
					 | 
				
			||||||
                    make_projective=make_projective,
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
            except AlignmentError:
 | 
					 | 
				
			||||||
                if ignore_misaligned:
 | 
					 | 
				
			||||||
                    gp = None
 | 
					 | 
				
			||||||
                else:
 | 
					 | 
				
			||||||
                    raise
 | 
					 | 
				
			||||||
            if gp is not None:
 | 
					 | 
				
			||||||
                parses.append((split_doc, gp))
 | 
					 | 
				
			||||||
        return parses
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef class GoldParse:
 | 
					cdef class GoldParse:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user