From 827fb51e6ccd469f683cfb66bb8c71a74bdaeefb Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 25 Jan 2021 00:52:00 +1100 Subject: [PATCH] Fix set_annotations during Parser.update --- spacy/pipeline/transition_parser.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spacy/pipeline/transition_parser.pyx b/spacy/pipeline/transition_parser.pyx index 8cb4ea15d..15b07e9b1 100644 --- a/spacy/pipeline/transition_parser.pyx +++ b/spacy/pipeline/transition_parser.pyx @@ -370,7 +370,11 @@ cdef class Parser(TrainablePipe): if sgd not in (None, False): self.finish_update(sgd) docs = [eg.predicted for eg in examples] - self.set_annotations(docs, all_states) + # TODO: Refactor so we don't have to parse twice like this (ugh) + # The issue is that we cut up the gold batch into sub-states, and that + # makes it hard to get the actual predicted transition sequence. + predicted_states = self.predict(docs) + self.set_annotations(docs, predicted_states) # Ugh, this is annoying. If we're working on GPU, we want to free the # memory ASAP. It seems that Python doesn't necessarily get around to # removing these in time if we don't explicitly delete? It's confusing.