mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 13:11:03 +03:00 
			
		
		
		
	* Make suffixes file use full-power regex, so that we can handle periods properly
This commit is contained in:
		
							parent
							
								
									accdbe989b
								
							
						
					
					
						commit
						b962fe73d7
					
				|  | @ -1,13 +1,13 @@ | ||||||
| , | , | ||||||
| " | \" | ||||||
| ) | \) | ||||||
| ] | \] | ||||||
| } | \} | ||||||
| * | \* | ||||||
| ! | \! | ||||||
| ? | \? | ||||||
| % | % | ||||||
| $ | \$ | ||||||
| > | > | ||||||
| : | : | ||||||
| ; | ; | ||||||
|  | @ -16,6 +16,8 @@ $ | ||||||
| '' | '' | ||||||
| 's | 's | ||||||
| 'S | 'S | ||||||
| .. | \.\. | ||||||
| ... | \.\.\. | ||||||
| .... | \.\.\.\. | ||||||
|  | (?<=[a-z0-9])\. | ||||||
|  | (?<=[0-9])km | ||||||
|  |  | ||||||
|  | @ -31,10 +31,7 @@ def read_prefix(data_dir): | ||||||
| def read_suffix(data_dir): | def read_suffix(data_dir): | ||||||
|     with utf8open(path.join(data_dir, 'suffix')) as file_: |     with utf8open(path.join(data_dir, 'suffix')) as file_: | ||||||
|         entries = file_.read().split('\n') |         entries = file_.read().split('\n') | ||||||
|         expression = '|'.join([re.escape(piece) + '$' for piece in entries if piece.strip()]) |         expression = '|'.join([piece + '$' for piece in entries if piece.strip()]) | ||||||
|     # TODO: Fix this hack! |  | ||||||
|     expression += r'|(?<=[a-z0-9])\.$' |  | ||||||
|     expression += r'|(?<=[0-9])km$' |  | ||||||
|     return expression |     return expression | ||||||
| 
 | 
 | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	Block a user