mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	* Possessive test passing
This commit is contained in:
		
							parent
							
								
									556f6a18ca
								
							
						
					
					
						commit
						e98e97d483
					
				
							
								
								
									
										17
									
								
								spacy/en.pyx
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								spacy/en.pyx
									
									
									
									
									
								
							| 
						 | 
					@ -155,11 +155,22 @@ cdef Lexeme* _init_lexeme(unicode string, StringHash hashed,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
cdef size_t _find_split(unicode word, size_t length):
 | 
					cdef size_t _find_split(unicode word, size_t length):
 | 
				
			||||||
    cdef size_t i = 0
 | 
					    cdef size_t i = 0
 | 
				
			||||||
    if word[0].isalnum():
 | 
					    if not is_punct(word, 0, length):
 | 
				
			||||||
        while i < length and word[i].isalnum():
 | 
					        while i < length and not is_punct(word, i, length):
 | 
				
			||||||
            i += 1
 | 
					            i += 1
 | 
				
			||||||
    else:
 | 
					    else:
 | 
				
			||||||
        # Split off a punctuation character, or a sequence of the same punctuation character
 | 
					        # Split off a punctuation character, or a sequence of the same punctuation character
 | 
				
			||||||
        while i < length and not word[i].isalnum() and (i == 0 or word[i-1] == word[i]):
 | 
					        while i < length and is_punct(word, i, length) and (i == 0 or word[i-1] == word[i]):
 | 
				
			||||||
            i += 1
 | 
					            i += 1
 | 
				
			||||||
    return i
 | 
					    return i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cdef bint is_punct(unicode word, size_t i, size_t length):
 | 
				
			||||||
 | 
					    if word[i] == "'":
 | 
				
			||||||
 | 
					        if i >= (length - 1):
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					        elif word[i + 1] == 's' and i != 0:
 | 
				
			||||||
 | 
					            return True
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            return False
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return not word[i].isalnum()
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user