mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-25 05:01:02 +03:00 
			
		
		
		
	Update base punctuation
This commit is contained in:
		
							parent
							
								
									9d6c8eaa49
								
							
						
					
					
						commit
						e85e1d571b
					
				|  | @ -33,7 +33,7 @@ _currency = r'\$ £ € ¥ ฿ US\$ C\$ A\$' | |||
| # These expressions contain various unicode variations, including characters | ||||
| # used in Chinese (see #1333, #1340, #1351) – unless there are cross-language | ||||
| # conflicts, spaCy's base tokenizer should handle all of those by default | ||||
| _punct = r'… …… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ ·' | ||||
| _punct = r'… …… , : ; \! \? ¿ ¡ \( \) \[ \] \{ \} < > _ # \* & 。 ? ! , 、 ; : ~ · ।' | ||||
| _quotes = r'\' \'\' " ” “ `` ` ‘ ´ ‘‘ ’’ ‚ , „ » « 「 」 『 』 ( ) 〔 〕 【 】 《 》 〈 〉' | ||||
| _hyphens = '- – — -- --- —— ~' | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	Block a user