mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Fix whitespace attachment thing
This commit is contained in:
		
							parent
							
								
									37919eac82
								
							
						
					
					
						commit
						329ae57520
					
				| 
						 | 
					@ -384,13 +384,16 @@ cdef class ArcEager(TransitionSystem):
 | 
				
			||||||
        for i in range(st.length):
 | 
					        for i in range(st.length):
 | 
				
			||||||
            # Always attach spaces to the previous word
 | 
					            # Always attach spaces to the previous word
 | 
				
			||||||
            if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE):
 | 
					            if Lexeme.c_check_flag(st._sent[i].lex, IS_SPACE):
 | 
				
			||||||
                st._sent[i].head = -1 if (i >= 1) else 1
 | 
					 | 
				
			||||||
                if st._sent[i].sent_start and st._sent[i].head == -1:
 | 
					                if st._sent[i].sent_start and st._sent[i].head == -1:
 | 
				
			||||||
                    st._sent[i].sent_start = False
 | 
					                    st._sent[i].sent_start = False
 | 
				
			||||||
                    # If we had this space token as the start of a sentence,
 | 
					                    # If we had this space token as the start of a sentence,
 | 
				
			||||||
                    # move that sentence start forward one
 | 
					                    # move that sentence start forward one
 | 
				
			||||||
                    if (i + 1) < st.length and not st._sent[i+1].sent_start:
 | 
					                    if (i + 1) < st.length and not st._sent[i+1].sent_start:
 | 
				
			||||||
                        st._sent[i+1].sent_start = True
 | 
					                        st._sent[i+1].sent_start = True
 | 
				
			||||||
 | 
					                    if i >= 1:
 | 
				
			||||||
 | 
					                        st.add_arc(i-1, i, st._sent[i].dep)
 | 
				
			||||||
 | 
					                    else:
 | 
				
			||||||
 | 
					                        st.add_arc(i+1, i, st._sent[i].dep)
 | 
				
			||||||
            elif st._sent[i].head == 0 and st._sent[i].dep == 0:
 | 
					            elif st._sent[i].head == 0 and st._sent[i].dep == 0:
 | 
				
			||||||
                st._sent[i].dep = self.root_label
 | 
					                st._sent[i].dep = self.root_label
 | 
				
			||||||
            # If we're not using the Break transition, we segment via root-labelled
 | 
					            # If we're not using the Break transition, we segment via root-labelled
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user