Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							8c0b4b850e 
							
						 
					 
					
						
						
							
							Fixed emoji handling for Hungarian  
						
						
						
					 
					
						2017-05-30 21:34:46 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a8e58e04ef 
							
						 
					 
					
						
						
							
							Add symbols class to punctuation rules to handle emoji (see  #1088 )  
						
						... 
						
						
						
						Currently doesn't work for Hungarian, because of conflicts with the
custom punctuation rules. Also doesn't take multi-character emoji like
👩🏽💻  into account. 
						
					 
					
						2017-05-27 17:57:10 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0084466a66 
							
						 
					 
					
						
						
							
							Remove unused utf8open util and replace os.path with ensure_path  
						
						
						
					 
					
						2017-04-16 20:37:45 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							444dd511c5 
							
						 
					 
					
						
						
							
							Fix xpassing URL test case  
						
						
						
					 
					
						2017-04-07 17:36:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							10e29189ac 
							
						 
					 
					
						
						
							
							Adjust URL testcases and xfail problems (instead of comment)  
						
						
						
					 
					
						2017-03-10 14:22:50 +01:00 
						 
				 
			
				
					
						
							
							
								Dan Rapp 
							
						 
					 
					
						
						
						
						
							
						
						
							123d3f2d38 
							
						 
					 
					
						
						
							
							Fix error in test case parameterization  
						
						
						
					 
					
						2017-03-09 12:18:21 -07:00 
						 
				 
			
				
					
						
							
							
								Dan Rapp 
							
						 
					 
					
						
						
						
						
							
						
						
							b9307dfcd7 
							
						 
					 
					
						
						
							
							Merge branch 'master' into rappdw/tokenizer_exceptions_url_fix  
						
						
						
					 
					
						2017-03-09 11:42:14 -07:00 
						 
				 
			
				
					
						
							
							
								Dan Rapp 
							
						 
					 
					
						
						
						
						
							
						
						
							3b1df3808d 
							
						 
					 
					
						
						
							
							Issue  #840  - URL pattenr too broad  
						
						
						
					 
					
						2017-03-09 11:39:39 -07:00 
						 
				 
			
				
					
						
							
							
								Aniruddha Adhikary 
							
						 
					 
					
						
						
						
						
							
						
						
							696215a3fb 
							
						 
					 
					
						
						
							
							add tests for Bengali  
						
						
						
					 
					
						2017-03-05 11:25:12 +06:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							138c53ff2e 
							
						 
					 
					
						
						
							
							Merge tokenizer tests  
						
						
						
					 
					
						2017-01-13 01:34:14 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							33e5f8dc2e 
							
						 
					 
					
						
						
							
							Create basic and extended test set for URLs  
						
						
						
					 
					
						2017-01-12 23:40:02 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ae7edd30e7 
							
						 
					 
					
						
						
							
							Move text file back to tokenizer tests directory  
						
						
						
					 
					
						2017-01-12 02:10:23 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c682b8ca90 
							
						 
					 
					
						
						
							
							Merge conftests into one cohesive file  
						
						
						
					 
					
						2017-01-11 13:56:32 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							869963c3c4 
							
						 
					 
					
						
						
							
							Mark extensive prefix/suffix tests as slow  
						
						
						
					 
					
						2017-01-10 15:57:35 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							487e020ebe 
							
						 
					 
					
						
						
							
							Add simple test for surrounding brackets  
						
						
						
					 
					
						2017-01-10 15:57:26 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0ba5cf51d2 
							
						 
					 
					
						
						
							
							Assert length first  
						
						
						
					 
					
						2017-01-10 15:57:00 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2185d31907 
							
						 
					 
					
						
						
							
							Adjust names and formatting  
						
						
						
					 
					
						2017-01-10 15:56:35 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e10d4ca964 
							
						 
					 
					
						
						
							
							Remove semi-redundant URLs and punctuation for faster testing  
						
						
						
					 
					
						2017-01-10 15:54:25 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3a3cb2c90c 
							
						 
					 
					
						
						
							
							Add unicode declaration  
						
						
						
					 
					
						2017-01-10 15:53:15 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							42cd598f57 
							
						 
					 
					
						
						
							
							Use correct fixtures in URL tokenizer  
						
						
						
					 
					
						2017-01-09 14:10:40 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							aa876884f0 
							
						 
					 
					
						
						
							
							Revert "Revert "Merge remote-tracking branch 'origin/master'""  
						
						... 
						
						
						
						This reverts commit fb9d3bb022 
						
					 
					
						2017-01-09 13:28:13 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							abb09782f9 
							
						 
					 
					
						
						
							
							Move sun.txt to original location and fix path to not break parser tests  
						
						
						
					 
					
						2017-01-08 20:32:54 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bbe7cab3a1 
							
						 
					 
					
						
						
							
							Move non-English-specific tests back to general tokenizer tests  
						
						
						
					 
					
						2017-01-05 18:09:29 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							637f785036 
							
						 
					 
					
						
						
							
							Add general sanity tests for all tokenizers  
						
						
						
					 
					
						2017-01-05 16:25:38 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c5f2dc15de 
							
						 
					 
					
						
						
							
							Move English tokenizer tests to directory /en  
						
						
						
					 
					
						2017-01-05 16:25:04 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8b45363b4d 
							
						 
					 
					
						
						
							
							Modernize and merge general tokenizer tests  
						
						
						
					 
					
						2017-01-05 13:17:05 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							02cfda48c9 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for string loading  
						
						
						
					 
					
						2017-01-05 13:16:55 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a11f684822 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for whitespace  
						
						
						
					 
					
						2017-01-05 13:16:33 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8b284fc6f1 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for text from file  
						
						
						
					 
					
						2017-01-05 13:15:52 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2c2e878653 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for punctuation  
						
						
						
					 
					
						2017-01-05 13:14:16 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8a74129cdf 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for prefixes/suffixes/infixes  
						
						
						
					 
					
						2017-01-05 13:13:12 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0e65dca9a5 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for exception and emoticons  
						
						
						
					 
					
						2017-01-05 13:11:31 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							34c47bb20d 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-01-05 13:10:51 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2e72683baa 
							
						 
					 
					
						
						
							
							Add missing docstrings  
						
						
						
					 
					
						2017-01-05 13:10:21 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							da10a049a6 
							
						 
					 
					
						
						
							
							Add unicode declarations  
						
						
						
					 
					
						2017-01-05 13:09:48 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8279993a6f 
							
						 
					 
					
						
						
							
							Modernize and merge tokenizer tests for punctuation  
						
						
						
					 
					
						2017-01-04 00:49:20 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							550630df73 
							
						 
					 
					
						
						
							
							Update tokenizer tests for contractions  
						
						
						
					 
					
						2017-01-04 00:48:42 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							109f202e8f 
							
						 
					 
					
						
						
							
							Update conftest fixture  
						
						
						
					 
					
						2017-01-04 00:48:21 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ee6b49b293 
							
						 
					 
					
						
						
							
							Modernize tokenizer tests for emoticons  
						
						
						
					 
					
						2017-01-04 00:47:59 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f09b5a5dfd 
							
						 
					 
					
						
						
							
							Modernize tokenizer tests for infixes  
						
						
						
					 
					
						2017-01-04 00:47:42 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							59059fed27 
							
						 
					 
					
						
						
							
							Move regression test for  #351  to own file  
						
						
						
					 
					
						2017-01-04 00:47:11 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							667051375d 
							
						 
					 
					
						
						
							
							Modernize tokenizer tests for whitespace  
						
						
						
					 
					
						2017-01-04 00:46:35 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							aafc894285 
							
						 
					 
					
						
						
							
							Modernize tokenizer tests for contractions  
						
						... 
						
						
						
						Use @pytest.mark.parametrize. 
						
					 
					
						2017-01-03 23:02:21 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							fb9d3bb022 
							
						 
					 
					
						
						
							
							Revert "Merge remote-tracking branch 'origin/master'"  
						
						... 
						
						
						
						This reverts commit d3b181cdf1b19cfcc144 
						
					 
					
						2017-01-03 18:21:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ba7c167a8 
							
						 
					 
					
						
						
							
							Fix URL tests  
						
						
						
					 
					
						2016-12-30 17:10:08 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3e8d9c772e 
							
						 
					 
					
						
						
							
							Test interaction of token_match and punctuation  
						
						... 
						
						
						
						Check that the new token_match function applies after punctuation is split off. 
						
					 
					
						2016-12-31 00:52:17 +11:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							1748549aeb 
							
						 
					 
					
						
						
							
							Added exception pattern mechanism to the tokenizer.  
						
						
						
					 
					
						2016-12-21 23:16:19 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d1c1d3f9cd 
							
						 
					 
					
						
						
							
							Fix tokenizer test  
						
						
						
					 
					
						2016-12-18 16:55:32 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							07f0efb102 
							
						 
					 
					
						
						
							
							Add test for tokenizer regular expressions  
						
						
						
					 
					
						2016-12-07 20:33:28 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b6b01d4680 
							
						 
					 
					
						
						
							
							Remove deprecated tokens_from_list test.  
						
						
						
					 
					
						2016-11-02 23:47:21 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cc8bf62208 
							
						 
					 
					
						
						
							
							* Fix Issue  #360 : Tokenizer failed when the infix regex matched the start of the string while trying to tokenize multi-infix tokens.  
						
						
						
					 
					
						2016-05-09 13:23:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b4bfc6ae55 
							
						 
					 
					
						
						
							
							* Add test for Issue  #351 : Indices off when leading whitespace  
						
						
						
					 
					
						2016-05-04 15:53:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6f82065761 
							
						 
					 
					
						
						
							
							* Fix infixed commas in tokenizer, re Issue  #326 . Need to benchmark on empirical data, to make sure this doesn't break other cases.  
						
						
						
					 
					
						2016-04-14 11:36:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04d0209be9 
							
						 
					 
					
						
						
							
							* Recognise multiple infixes in a token.  
						
						
						
					 
					
						2016-04-13 18:38:26 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b1fe41b45d 
							
						 
					 
					
						
						
							
							* Extend infix test, commenting on limitation of tokenizer w.r.t. infixes at the moment.  
						
						
						
					 
					
						2016-03-29 14:31:05 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c73983bdd 
							
						 
					 
					
						
						
							
							* Add test for hyphenation problem in Issue  #302  
						
						
						
					 
					
						2016-03-29 14:27:13 +11:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							c12d3dd200 
							
						 
					 
					
						
						
							
							add __init__.py to empty package dirs  
						
						
						
					 
					
						2016-03-14 11:28:03 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							9d8966a2c0 
							
						 
					 
					
						
						
							
							Update test_tokenizer.py  
						
						
						
					 
					
						2016-02-10 19:24:37 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7f24229f10 
							
						 
					 
					
						
						
							
							* Don't try to pickle the tokenizer  
						
						
						
					 
					
						2016-02-06 14:09:05 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							515493c675 
							
						 
					 
					
						
						
							
							* Add xfail test for Issue  #225 : tokenization with non-whitespace delimiters  
						
						
						
					 
					
						2016-01-19 13:20:14 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							223d2b3484 
							
						 
					 
					
						
						
							
							* Add test for Issue  #154 : Additional whitespace introduced when string ends with a whitespace token.  
						
						
						
					 
					
						2016-01-16 17:08:07 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3fbfba575a 
							
						 
					 
					
						
						
							
							* xfail the contractions test  
						
						
						
					 
					
						2015-12-31 13:16:28 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4b4eec8b47 
							
						 
					 
					
						
						
							
							* Fix Issue  #201 : Tokenization of there'll  
						
						
						
					 
					
						2015-12-29 18:09:09 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4e16f9e435 
							
						 
					 
					
						
						
							
							* Move tests underneath spacy/  
						
						
						
					 
					
						2015-10-26 00:07:31 +11:00