ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9baab241b4 
							
						 
					 
					
						
						
							
							Add skeleton language data for Turkish  
						
						 
						
						
						
					 
					
						2017-11-02 16:32:24 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c6fea3e5f6 
							
						 
					 
					
						
						
							
							Add Romanian and Croatian skeletons (experimental)  
						
						 
						
						... 
						
						
						
						Add language data templates to make it easier for others to contribute to the language support 
						
					 
					
						2017-11-01 23:04:28 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							18c859500b 
							
						 
					 
					
						
						
							
							Add missing imports  
						
						 
						
						
						
					 
					
						2017-11-01 23:02:51 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							819e30a26e 
							
						 
					 
					
						
						
							
							Tidy up tokenizer exceptions  
						
						 
						
						
						
					 
					
						2017-11-01 23:02:45 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9659391944 
							
						 
					 
					
						
						
							
							Update deprecated methods and add warnings  
						
						 
						
						
						
					 
					
						2017-11-01 16:49:42 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d11659463b 
							
						 
					 
					
						
						
							
							Merge pull request  #1152  from jimregan/develop-irish  
						
						 
						
						... 
						
						
						
						[WIP] attempt a port from #1147  
						
					 
					
						2017-11-01 00:23:43 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7e424a1804 
							
						 
					 
					
						
						
							
							Don't copy exception dicts if not necessary and tidy up  
						
						 
						
						
						
					 
					
						2017-10-31 21:05:29 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							06c25a8882 
							
						 
					 
					
						
						
							
							Remove comma that caused list to wrap in tuple!  
						
						 
						
						... 
						
						
						
						Also removed extra dict wrappings for performance (we used to have them in there, but they should only really exist if copying the dict is absolutely necessary) 
						
					 
					
						2017-10-31 20:13:16 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							147448b65b 
							
						 
					 
					
						
						
							
							Add missing symbols  
						
						 
						
						
						
					 
					
						2017-10-31 19:34:45 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9b0de9fb43 
							
						 
					 
					
						
						
							
							Fix import of symbols (now nested one level lower)  
						
						 
						
						
						
					 
					
						2017-10-31 19:17:58 +01:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							41dd29e48e 
							
						 
					 
					
						
						
							
							merge  
						
						 
						
						
						
					 
					
						2017-10-31 14:07:45 +00:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							090bd00369 
							
						 
					 
					
						
						
							
							Merge pull request  #1464  from mayukh18/develop_bengali_pronouns  
						
						 
						
						... 
						
						
						
						added the bengali pronouns for v2.0 
						
					 
					
						2017-10-25 21:55:25 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								mayukh18 
							
						 
					 
					
						
						
						
						
							
						
						
							1bc07758fa 
							
						 
					 
					
						
						
							
							added few bengali pronouns  
						
						 
						
						
						
					 
					
						2017-10-25 22:24:40 +05:30  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d3bf488e16 
							
						 
					 
					
						
						
							
							Merge pull request  #1171  from mollerhoj/support-danish  
						
						 
						
						... 
						
						
						
						Improve basic support for Danish 
						
					 
					
						2017-10-24 20:29:57 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							66766c1454 
							
						 
					 
					
						
						
							
							Restore SP tag to English tag_map, until models migrate  
						
						 
						
						
						
					 
					
						2017-10-24 17:05:00 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c55db0a4a1 
							
						 
					 
					
						
						
							
							Add example sentences for Japanese and Chinese (see  #1107 )  
						
						 
						
						
						
					 
					
						2017-10-24 13:02:24 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							66f8f9d4a0 
							
						 
					 
					
						
						
							
							Fix Japanese tokenizer  
						
						 
						
						... 
						
						
						
						JapaneseTokenizer now returns a Doc, not individual words 
						
					 
					
						2017-10-24 13:02:19 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							facf77e541 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into support-danish  
						
						 
						
						
						
					 
					
						2017-10-24 11:53:19 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							49895fbef6 
							
						 
					 
					
						
						
							
							Rename 'SP' special tag to '_SP'  
						
						 
						
						... 
						
						
						
						Renaming the tag with an underscore lets us add it to the tag map
without worrying that we'll change the sequence of tags, which throws
off the tag-to-ID mapping. For instance, if we inserted a 'SP' tag,
the "VERB" tag is pushed to a different class ID, and the model is all
messed up. 
						
					 
					
						2017-10-20 14:01:12 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f0d577e460 
							
						 
					 
					
						
						
							
							Merge pull request  #1425  from explosion/feature/hindi-tokenizer  
						
						 
						
						... 
						
						
						
						💫  Basic Hindi tokenization support 
						
					 
					
						2017-10-18 13:34:52 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							839de87ca9 
							
						 
					 
					
						
						
							
							Make lambda func a named function, for pickling  
						
						 
						
						
						
					 
					
						2017-10-17 18:21:20 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9ce7d6af87 
							
						 
					 
					
						
						
							
							Make lex attr functions top-level functions, to promote pickling  
						
						 
						
						
						
					 
					
						2017-10-17 18:19:18 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							aab299c8ae 
							
						 
					 
					
						
						
							
							Merge pull request  #1429  from vishnunekkanti/develop  
						
						 
						
						... 
						
						
						
						fix syntax error in zh 
						
					 
					
						2017-10-17 14:45:02 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							485c4f6df5 
							
						 
					 
					
						
						
							
							Add Hungarian examples (see  #1107 )  
						
						 
						
						
						
					 
					
						2017-10-17 02:37:45 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Vishnu Kumar Nekkanti 
							
						 
					 
					
						
						
						
						
							
						
						
							d3c54cf39a 
							
						 
					 
					
						
						
							
							fixed SyntaxError while checking for jieba  
						
						 
						
						
						
					 
					
						2017-10-16 18:51:33 +05:30  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							266e7180a7 
							
						 
					 
					
						
						
							
							Add Language class, stop words and basic stemmer that sets NORM  
						
						 
						
						
						
					 
					
						2017-10-14 14:59:52 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e85e1d571b 
							
						 
					 
					
						
						
							
							Update base punctuation  
						
						 
						
						
						
					 
					
						2017-10-14 14:59:23 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9d6c8eaa49 
							
						 
					 
					
						
						
							
							Update base norm exceptions with more unicode characters  
						
						 
						
						... 
						
						
						
						e.g. unicode variations of punctuation used in Chinese 
						
					 
					
						2017-10-14 14:58:52 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							38c756fd85 
							
						 
					 
					
						
						
							
							Port over changes from  #1287  
						
						 
						
						
						
					 
					
						2017-10-14 13:16:21 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							612224c10d 
							
						 
					 
					
						
						
							
							Port over changes from  #1157  
						
						 
						
						
						
					 
					
						2017-10-14 13:11:39 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a4d974d97b 
							
						 
					 
					
						
						
							
							Port over URL pattern changes from  #1411  
						
						 
						
						
						
					 
					
						2017-10-14 12:58:07 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							09aed58140 
							
						 
					 
					
						
						
							
							Port over changes from  #1333  and add comments  
						
						 
						
						
						
					 
					
						2017-10-14 12:52:59 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8ce6f96180 
							
						 
					 
					
						
						
							
							Don't make copies of language data components  
						
						 
						
						
						
					 
					
						2017-10-11 15:34:55 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							417d45f5d0 
							
						 
					 
					
						
						
							
							Add lemmatizer data as variable on language data  
						
						 
						
						... 
						
						
						
						Don't create lookup lemmatizer within Language class and just pass in
the data so it can be set on Token creation 
						
					 
					
						2017-10-11 02:24:58 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0c2343d73a 
							
						 
					 
					
						
						
							
							Tidy up language data  
						
						 
						
						
						
					 
					
						2017-10-11 02:22:49 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8143618497 
							
						 
					 
					
						
						
							
							Set prefix length back to 1  
						
						 
						
						
						
					 
					
						2017-10-10 19:32:54 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dce8afb9cf 
							
						 
					 
					
						
						
							
							Set prefix length to 3  
						
						 
						
						
						
					 
					
						2017-10-09 21:55:55 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							959c46eabe 
							
						 
					 
					
						
						
							
							Merge pull request  #1365  from wannaphongcom/develop  
						
						 
						
						... 
						
						
						
						Add Thai language for spaCy v2 
						
					 
					
						2017-09-26 23:43:05 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							3d5046c499 
							
						 
					 
					
						
						
							
							fix import in th  
						
						 
						
						
						
					 
					
						2017-09-26 22:41:20 +07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							a63f790b8c 
							
						 
					 
					
						
						
							
							fix thai tag_map  
						
						 
						
						
						
					 
					
						2017-09-26 22:28:57 +07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							2ea27d07f4 
							
						 
					 
					
						
						
							
							fix tokenizer_exceptions in thai  
						
						 
						
						
						
					 
					
						2017-09-26 22:14:47 +07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							a2bf4cc7bf 
							
						 
					 
					
						
						
							
							fix newline in file  
						
						 
						
						
						
					 
					
						2017-09-26 21:49:43 +07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bb5c631402 
							
						 
					 
					
						
						
							
							Implement like_num getter for French (via  #1161 )  
						
						 
						
						
						
					 
					
						2017-09-26 16:47:45 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							15479b3bae 
							
						 
					 
					
						
						
							
							Add comment to like_num re: future work  
						
						 
						
						
						
					 
					
						2017-09-26 16:43:28 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							adda08fe14 
							
						 
					 
					
						
						
							
							Implement like_num getter for Dutch (via  #1177 )  
						
						 
						
						
						
					 
					
						2017-09-26 16:39:15 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							5ee10379db 
							
						 
					 
					
						
						
							
							Port over changes from  #1340  
						
						 
						
						
						
					 
					
						2017-09-26 16:38:08 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							5cba67146c 
							
						 
					 
					
						
						
							
							add thai in spacy2  
						
						 
						
						
						
					 
					
						2017-09-26 21:36:27 +07:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							10d291f129 
							
						 
					 
					
						
						
							
							Port over change from  #1351  
						
						 
						
						
						
					 
					
						2017-09-26 16:11:41 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ece30c28a8 
							
						 
					 
					
						
						
							
							Don't split hyphenated words in German  
						
						 
						
						... 
						
						
						
						This way, the tokenizer matches the tokenization in German treebanks 
						
					 
					
						2017-09-16 20:40:15 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bd3da3d6fb 
							
						 
					 
					
						
						
							
							Port over change from  #1323  and tidy up  
						
						 
						
						
						
					 
					
						2017-09-14 19:23:13 +02:00