From 329ae57520d2f6ea7430fda0f6e8afd3975f23d3 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Tue, 13 Oct 2015 09:46:38 +0200 Subject: [PATCH] * Fix whitespace attachment thing --- spacy/syntax/arc_eager.pyx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/spacy/syntax/arc_eager.pyx b/spacy/syntax/arc_eager.pyx index 561308928..a7ed8874c 100644 --- a/spacy/syntax/arc_eager.pyx +++ b/spacy/syntax/arc_eager.pyx @@ -384,13 +384,16 @@ cdef class ArcEager(TransitionSystem): for i in range(st.length): # Always attach spaces to the previous word if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE): - st._sent[i].head = -1 if (i >= 1) else 1 if st._sent[i].sent_start and st._sent[i].head == -1: st._sent[i].sent_start = False # If we had this space token as the start of a sentence, # move that sentence start forward one if (i + 1) < st.length and not st._sent[i+1].sent_start: st._sent[i+1].sent_start = True + if i >= 1: + st.add_arc(i-1, i, st._sent[i].dep) + else: + st.add_arc(i+1, i, st._sent[i].dep) elif st._sent[i].head == 0 and st._sent[i].dep == 0: st._sent[i].dep = self.root_label # If we're not using the Break transition, we segment via root-labelled