mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Add regular expressions for English multi-word expressions
This commit is contained in:
		
							parent
							
								
									5a075ea3fc
								
							
						
					
					
						commit
						2fee67cfa3
					
				
							
								
								
									
										35
									
								
								spacy/en/regexes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								spacy/en/regexes.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,35 @@
 | 
				
			||||||
 | 
					import re
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					_mw_prepositions = [
 | 
				
			||||||
 | 
					    'close to',
 | 
				
			||||||
 | 
					    'down by',
 | 
				
			||||||
 | 
					    'on the way to',
 | 
				
			||||||
 | 
					    'on my way',
 | 
				
			||||||
 | 
					    'on his way',
 | 
				
			||||||
 | 
					    'on her way',
 | 
				
			||||||
 | 
					    'on your way',
 | 
				
			||||||
 | 
					    'on our way',
 | 
				
			||||||
 | 
					    'on their way',
 | 
				
			||||||
 | 
					]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MW_PREPOSITIONS_RE = re.compile('|'.join(_mw_prepositions), flags=re.IGNORECASE)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					TIME_RE = re.compile(
 | 
				
			||||||
 | 
					    '{colon_digits}|{colon_digits} ?{am_pm}?|{one_two_digits} ?({am_pm})'.format(
 | 
				
			||||||
 | 
					        colon_digits=r'[0-2]?[0-9]:[0-5][0-9](?::[0-5][0-9])?',
 | 
				
			||||||
 | 
					        one_two_digits=r'[0-2]?[0-9]',
 | 
				
			||||||
 | 
					        am_pm=r'[ap]\.?m\.?'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					MONEY_RE = re.compile('\$\d+(?:\.\d+)?|\d+ dollars(?: \d+ cents)?')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					DAYS_RE = re.compile('Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					REGEXES = [('IN', 'O', MW_PREPOSITIONS_RE), ('CD', 'TIME', TIME_RE),
 | 
				
			||||||
 | 
					           ('NNP', 'DATE', DAYS_RE), ('CD', 'MONEY', MONEY_RE)]
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user