mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-10-31 07:57:35 +03:00 
			
		
		
		
	Add test for #1488 (passes on v2.0.0a18?)
This commit is contained in:
		
							parent
							
								
									711278b667
								
							
						
					
					
						commit
						f0986df94b
					
				
							
								
								
									
										26
									
								
								spacy/tests/regression/test_issue1488.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								spacy/tests/regression/test_issue1488.py
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,26 @@ | |||
| # coding: utf8 | ||||
| from __future__ import unicode_literals | ||||
| 
 | ||||
| import regex as re | ||||
| from ...lang.en import English | ||||
| from ...tokenizer import Tokenizer | ||||
| 
 | ||||
| 
 | ||||
| def test_issue1488(): | ||||
|     prefix_re = re.compile(r'''[\[\("']''') | ||||
|     suffix_re = re.compile(r'''[\]\)"']''') | ||||
|     infix_re = re.compile(r'''[-~\.]''') | ||||
|     simple_url_re = re.compile(r'''^https?://''') | ||||
| 
 | ||||
|     def my_tokenizer(nlp): | ||||
|         return Tokenizer(nlp.vocab, {}, | ||||
|                          prefix_search=prefix_re.search, | ||||
|                          suffix_search=suffix_re.search, | ||||
|                          infix_finditer=infix_re.finditer, | ||||
|                          token_match=simple_url_re.match) | ||||
| 
 | ||||
|     nlp = English() | ||||
|     nlp.tokenizer = my_tokenizer(nlp) | ||||
|     doc = nlp("This is a test.") | ||||
|     for token in doc: | ||||
|         print(token.text) | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user