Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							eb3040ce46 
							
						 
					 
					
						
						
							
							Merge pull request  #1891  from fucking-signup/master  
						
						... 
						
						
						
						Fix issue #1889  
						
					 
					
						2018-02-18 13:47:47 +01:00 
						 
				 
			
				
					
						
							
							
								4altinok 
							
						 
					 
					
						
						
						
						
							
						
						
							94fb0b75e3 
							
						 
					 
					
						
						
							
							code for is_currency  
						
						
						
					 
					
						2018-02-11 18:51:32 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							0954e15dda 
							
						 
					 
					
						
						
							
							Merge pull request  #1913  from ohenrik/nb_syntax_iterator  
						
						... 
						
						
						
						Norwegian Language (nb) - Added french syntax iterator with explanation 
						
					 
					
						2018-02-06 04:59:07 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							251a7805fe 
							
						 
					 
					
						
						
							
							Copied French syntax iterator to simplify future changes  
						
						
						
					 
					
						2018-02-05 14:45:05 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f1d3deffac 
							
						 
					 
					
						
						
							
							Add Russian example sentences (see  #1107 )  
						
						
						
					 
					
						2018-02-01 20:09:40 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							e40465487c 
							
						 
					 
					
						
						
							
							Added french syntax iterator with explenation  
						
						
						
					 
					
						2018-01-30 15:44:29 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							cb7110c22e 
							
						 
					 
					
						
						
							
							Merge pull request  #1882  from ohenrik/nb_lemma_and_tag_map  
						
						... 
						
						
						
						Add norwegian bokmål ('nb') lemmatizer and tag_map 
						
					 
					
						2018-01-29 18:18:50 +01:00 
						 
				 
			
				
					
						
							
							
								Ali Zarezade 
							
						 
					 
					
						
						
						
						
							
						
						
							bb6bd3d8ae 
							
						 
					 
					
						
						
							
							add persian language  
						
						
						
					 
					
						2018-01-27 13:27:26 +03:30 
						 
				 
			
				
					
						
							
							
								Ali Zarezade 
							
						 
					 
					
						
						
						
						
							
						
						
							d195675db5 
							
						 
					 
					
						
						
							
							add persian language  
						
						
						
					 
					
						2018-01-27 13:21:38 +03:30 
						 
				 
			
				
					
						
							
							
								Kit 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							4b42267ba3 
							
						 
					 
					
						
						
							
							Fix issue  #1889  
						
						
						
					 
					
						2018-01-25 23:17:22 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							8e2c9f2475 
							
						 
					 
					
						
						
							
							Cleaned up nb tag_map comments  
						
						
						
					 
					
						2018-01-25 11:09:28 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							1107e89fcf 
							
						 
					 
					
						
						
							
							Updated doc string on nb tag_map module  
						
						
						
					 
					
						2018-01-25 11:08:28 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							4058a7d579 
							
						 
					 
					
						
						
							
							Fix æøå characters in lemmatizer  
						
						
						
					 
					
						2018-01-24 14:03:14 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							42248f423f 
							
						 
					 
					
						
						
							
							Updated tag map  
						
						
						
					 
					
						2018-01-24 13:50:33 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							74b430b49a 
							
						 
					 
					
						
						
							
							Correct Lemmatizer  
						
						
						
					 
					
						2018-01-24 13:26:33 +01:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							b9b3a40c78 
							
						 
					 
					
						
						
							
							Add norwegian lemmatizer and tag_map  
						
						
						
					 
					
						2018-01-24 12:28:29 +01:00 
						 
				 
			
				
					
						
							
							
								Ali Zarezade 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							42349471bc 
							
						 
					 
					
						
						
							
							add ٪ as punctuation  
						
						
						
					 
					
						2018-01-23 18:11:33 +03:30 
						 
				 
			
				
					
						
							
							
								Ali Zarezade 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							2bda582135 
							
						 
					 
					
						
						
							
							Add Persian character and symbols  
						
						... 
						
						
						
						Add Persian characters and the following:
- ٪ used instead of %
- ؟ used instead of ?
- ﷼ used instead of $
- ، used instead of ,
- ؛ used instead of ; 
						
					 
					
						2018-01-23 13:20:36 +03:30 
						 
				 
			
				
					
						
							
							
								Kit 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							701e7cc6aa 
							
						 
					 
					
						
						
							
							Rename variable to keep code consistent  
						
						
						
					 
					
						2018-01-08 03:38:44 +01:00 
						 
				 
			
				
					
						
							
							
								Kit 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							ed0db95183 
							
						 
					 
					
						
						
							
							Find lowercased forms of ordinal words, where possible  
						
						
						
					 
					
						2018-01-08 03:28:50 +01:00 
						 
				 
			
				
					
						
							
							
								Kit 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9bc524982e 
							
						 
					 
					
						
						
							
							Find lowercased forms of numeric words  
						
						
						
					 
					
						2018-01-08 03:25:08 +01:00 
						 
				 
			
				
					
						
							
							
								Kevin Humphreys 
							
						 
					 
					
						
						
						
						
							
						
						
							7918fa4ef9 
							
						 
					 
					
						
						
							
							handle would've  
						
						
						
					 
					
						2018-01-03 12:25:48 -08:00 
						 
				 
			
				
					
						
							
							
								zqhZY 
							
						 
					 
					
						
						
						
						
							
						
						
							f27859fa99 
							
						 
					 
					
						
						
							
							add ChineseDefaults class for pickling  
						
						
						
					 
					
						2017-12-28 17:13:58 +08:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							bef735aef7 
							
						 
					 
					
						
						
							
							Fix Danish abbreviation 'm.h.t.'  
						
						
						
					 
					
						2017-12-21 09:24:31 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a3dd167d7f 
							
						 
					 
					
						
						
							
							Merge branch 'master' into da_ud_tokenization  
						
						
						
					 
					
						2017-12-20 21:05:34 +00:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							97f100f69f 
							
						 
					 
					
						
						
							
							Merge pull request  #1742  from kimfalk/master  
						
						... 
						
						
						
						Two corrections in the da lan. 
						
					 
					
						2017-12-20 21:02:00 +00:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d682a8803e 
							
						 
					 
					
						
						
							
							Merge pull request  #1672  from cbilgili/master  
						
						... 
						
						
						
						Adds Turkish Lemmatization 
						
					 
					
						2017-12-20 21:01:00 +00:00 
						 
				 
			
				
					
						
							
							
								Benjamin Peterson 
							
						 
					 
					
						
						
						
						
							
						
						
							9452134cd1 
							
						 
					 
					
						
						
							
							remove no-break spaces from Hindi example ( fixes   #1750 )  
						
						
						
					 
					
						2017-12-20 11:35:30 -08:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							7a2f2f6f94 
							
						 
					 
					
						
						
							
							Fix formatting.  
						
						
						
					 
					
						2017-12-20 18:37:37 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							15d13efafd 
							
						 
					 
					
						
						
							
							Tune Danish tokenizer to more closely match tokenization in Universal Dependencies.  
						
						
						
					 
					
						2017-12-20 17:36:52 +01:00 
						 
				 
			
				
					
						
							
							
								Kim FalkJørgensen 
							
						 
					 
					
						
						
						
						
							
						
						
							648dc60755 
							
						 
					 
					
						
						
							
							Remove the incorrect exception 'm.h.t'  
						
						
						
					 
					
						2017-12-20 10:02:39 +01:00 
						 
				 
			
				
					
						
							
							
								Kim FalkJørgensen 
							
						 
					 
					
						
						
						
						
							
						
						
							9c9f4ef84a 
							
						 
					 
					
						
						
							
							Fixing a translation error in examples.py  
						
						... 
						
						
						
						Adding an exception in the tokenizer_exceptions.py 
						
					 
					
						2017-12-19 15:26:50 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							22dc744b48 
							
						 
					 
					
						
						
							
							Fix check for '@' in like_url (see  #1715 )  
						
						
						
					 
					
						2017-12-16 13:48:43 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							6455b574fc 
							
						 
					 
					
						
						
							
							Check for email address first  
						
						
						
					 
					
						2017-12-12 10:25:13 +01:00 
						 
				 
			
				
					
						
							
							
								Bri-Will 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d77361d76c 
							
						 
					 
					
						
						
							
							Update lex_attrs.py. Fix like_url from matching on e-mail  
						
						
						
					 
					
						2017-12-11 14:13:28 -08:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							2ab0f2d186 
							
						 
					 
					
						
						
							
							Merge pull request  #1664  from jimregan/italian-lemmatizer  
						
						... 
						
						
						
						BOM in Italian lemmatiser 
						
					 
					
						2017-12-06 11:09:04 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							3f247119d3 
							
						 
					 
					
						
						
							
							Merge pull request  #1668  from sorenlind/da_morph  
						
						... 
						
						
						
						Add more Danish morph rules and clean up existing ones 
						
					 
					
						2017-12-06 11:08:09 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f2ea6d4713 
							
						 
					 
					
						
						
							
							Add Dutch example sentences (see  #1107 )  
						
						
						
					 
					
						2017-12-01 23:36:05 +01:00 
						 
				 
			
				
					
						
							
							
								Canbey Bilgili 
							
						 
					 
					
						
						
						
						
							
						
						
							abe098b255 
							
						 
					 
					
						
						
							
							Adds Turkish Lemmatization  
						
						
						
					 
					
						2017-12-01 17:04:32 +03:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							d86b537a38 
							
						 
					 
					
						
						
							
							Enable morph rules for Danish  
						
						
						
					 
					
						2017-11-30 15:58:02 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							13a988adc3 
							
						 
					 
					
						
						
							
							Remove 'Number[psor]'  
						
						
						
					 
					
						2017-11-30 15:55:04 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							dd6fde18a9 
							
						 
					 
					
						
						
							
							Add more Danish morph rules and clean up existing ones  
						
						
						
					 
					
						2017-11-30 11:17:19 +01:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							4ba7ddf651 
							
						 
					 
					
						
						
							
							Bugfixies  
						
						
						
					 
					
						2017-11-30 12:29:38 +03:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							c3e6cee17a 
							
						 
					 
					
						
						
							
							use inan in polimorf tagset conversion  
						
						
						
					 
					
						2017-11-29 23:15:47 +00:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							b32575e78c 
							
						 
					 
					
						
						
							
							imports  
						
						
						
					 
					
						2017-11-29 23:03:41 +00:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							3696ce6a7b 
							
						 
					 
					
						
						
							
							add UD mapping  
						
						
						
					 
					
						2017-11-29 22:59:19 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							f9ed9ea529 
							
						 
					 
					
						
						
							
							Merge pull request  #1624  from GreenRiverRUS/russian  
						
						... 
						
						
						
						Add support for Russian 
						
					 
					
						2017-11-29 23:10:01 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							076a6fc60a 
							
						 
					 
					
						
						
							
							symbols  
						
						
						
					 
					
						2017-11-29 20:11:20 +00:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							834ba3c69a 
							
						 
					 
					
						
						
							
							(semi generated) Polimorf mapping  
						
						
						
					 
					
						2017-11-29 20:08:24 +00:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							ba6a23fd11 
							
						 
					 
					
						
						
							
							BOM in Italian lemmatiser  
						
						
						
					 
					
						2017-11-29 17:40:07 +00:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9052643e2c 
							
						 
					 
					
						
						
							
							Merge pull request  #1653  from sorenlind/da_example_typo  
						
						... 
						
						
						
						Fix typo 
						
					 
					
						2017-11-27 14:47:42 +00:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							5fe58b885b 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2017-11-27 15:36:18 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d52b1ab245 
							
						 
					 
					
						
						
							
							Add unicode_literals (hopefully fixes test failure on Python 2)  
						
						
						
					 
					
						2017-11-27 15:16:54 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							0ffd27b0f6 
							
						 
					 
					
						
						
							
							Add several Danish alternative spellings  
						
						
						
					 
					
						2017-11-27 13:35:41 +01:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							cacd859dcd 
							
						 
					 
					
						
						
							
							Added tag map, fixed tests fails, added more exceptions  
						
						
						
					 
					
						2017-11-26 20:54:48 +03:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							ef03e9ea53 
							
						 
					 
					
						
						
							
							Remove unused import.  
						
						
						
					 
					
						2017-11-25 13:04:02 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							6aa241bcec 
							
						 
					 
					
						
						
							
							Add day of month tokenizer exceptions for Danish.  
						
						
						
					 
					
						2017-11-24 15:03:24 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							0c276ed020 
							
						 
					 
					
						
						
							
							Add weekday abbreviations and remove abiguous month abbreviations for Danish.  
						
						
						
					 
					
						2017-11-24 14:43:29 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							056547e989 
							
						 
					 
					
						
						
							
							Add multiple tokenizer exceptions for Danish.  
						
						
						
					 
					
						2017-11-24 11:51:26 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							ac8116510d 
							
						 
					 
					
						
						
							
							Fix tokenization of 'i.' for Danish.  
						
						
						
					 
					
						2017-11-24 11:16:53 +01:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							81314f8659 
							
						 
					 
					
						
						
							
							Fixed tokenizer: added char classes; added first lemmatizer and  
						
						... 
						
						
						
						tokenizer tests 
						
					 
					
						2017-11-21 22:23:59 +03:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							52ee1f9bf9 
							
						 
					 
					
						
						
							
							Updated Russian Language, added lemmatizer, norm exceptions and lex  
						
						... 
						
						
						
						attrs 
						
					 
					
						2017-11-21 11:44:46 +03:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							a0739a06d4 
							
						 
					 
					
						
						
							
							Returned russian support from v1.10 branch  
						
						
						
					 
					
						2017-11-17 17:06:15 +03:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c9d72de0fb 
							
						 
					 
					
						
						
							
							Add dummy serialization methods for Japanese and missing lang getter ( resolves   #1557 )  
						
						
						
					 
					
						2017-11-15 12:44:02 +01:00 
						 
				 
			
				
					
						
							
							
								Mathias Deschamps 
							
						 
					 
					
						
						
						
						
							
						
						
							c0691b2ab4 
							
						 
					 
					
						
						
							
							Add tokenizer exceptions for ing verbs  
						
						... 
						
						
						
						Extend list of tokenizing exceptions introduced in 123810b 
						
					 
					
						2017-11-13 17:46:05 +01:00 
						 
				 
			
				
					
						
							
							
								Mathias Deschamps 
							
						 
					 
					
						
						
						
						
							
						
						
							288298ead9 
							
						 
					 
					
						
						
							
							Add norm exception for ing verbs  
						
						... 
						
						
						
						Some ing verbs are sometimes written in or in'. Make the NORM form correct 
						
					 
					
						2017-11-13 17:46:05 +01:00 
						 
				 
			
				
					
						
							
							
								Abhinav Sharma 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							59f5740ede 
							
						 
					 
					
						
						
							
							improved upon the list of included stop_words  
						
						
						
					 
					
						2017-11-13 17:13:49 +05:30 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							123810b6de 
							
						 
					 
					
						
						
							
							Add "lovin'" to tokenizer exceptions (see  #1248 )  
						
						
						
					 
					
						2017-11-09 17:09:30 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							42b241ccd0 
							
						 
					 
					
						
						
							
							Update language code in usage example in comment  
						
						
						
					 
					
						2017-11-08 11:36:38 +01:00 
						 
				 
			
				
					
						
							
							
								Abhinav Sharma 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							84edade82d 
							
						 
					 
					
						
						
							
							Create examples.py  
						
						... 
						
						
						
						Populated the file with the translations of English example sentences 
						
					 
					
						2017-11-08 13:23:08 +05:30 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bcf42b8846 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2017-11-08 01:06:37 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							acb9bdb852 
							
						 
					 
					
						
						
							
							Fix PRON_LEMMA imports  
						
						
						
					 
					
						2017-11-06 17:41:53 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							baa231745c 
							
						 
					 
					
						
						
							
							Fix Dutch tag map  
						
						
						
					 
					
						2017-11-05 21:41:50 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							507ecb67af 
							
						 
					 
					
						
						
							
							Fix Spanish tag map  
						
						
						
					 
					
						2017-11-05 19:23:34 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							975e1042ff 
							
						 
					 
					
						
						
							
							Fix Italian tag map  
						
						
						
					 
					
						2017-11-05 18:34:09 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6b2d6e4937 
							
						 
					 
					
						
						
							
							Fix Portuguese tag map  
						
						
						
					 
					
						2017-11-05 18:31:00 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fa2687fded 
							
						 
					 
					
						
						
							
							Fix Dutch tag map  
						
						
						
					 
					
						2017-11-05 17:57:59 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fb8990d916 
							
						 
					 
					
						
						
							
							Fix Spanish tag map  
						
						
						
					 
					
						2017-11-05 17:48:46 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9d13288f73 
							
						 
					 
					
						
						
							
							Fix French tag map  
						
						
						
					 
					
						2017-11-05 17:47:59 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							54579805c5 
							
						 
					 
					
						
						
							
							Fix French tag map  
						
						
						
					 
					
						2017-11-05 17:44:05 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0d4bd6414e 
							
						 
					 
					
						
						
							
							Fix Italian tag map  
						
						
						
					 
					
						2017-11-05 14:11:03 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ef597622a6 
							
						 
					 
					
						
						
							
							Add Portuguese tag map  
						
						
						
					 
					
						2017-11-05 13:58:34 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							793c62dfda 
							
						 
					 
					
						
						
							
							Add Dutch tag map  
						
						
						
					 
					
						2017-11-05 13:48:07 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f7485a09c8 
							
						 
					 
					
						
						
							
							Fix Italian tag map  
						
						
						
					 
					
						2017-11-05 13:12:58 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3cef901834 
							
						 
					 
					
						
						
							
							Add tag map for French and Italian  
						
						
						
					 
					
						2017-11-04 23:32:51 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6c15aafebd 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-11-04 23:07:02 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9baab241b4 
							
						 
					 
					
						
						
							
							Add skeleton language data for Turkish  
						
						
						
					 
					
						2017-11-02 16:32:24 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c6fea3e5f6 
							
						 
					 
					
						
						
							
							Add Romanian and Croatian skeletons (experimental)  
						
						... 
						
						
						
						Add language data templates to make it easier for others to contribute to the language support 
						
					 
					
						2017-11-01 23:04:28 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							18c859500b 
							
						 
					 
					
						
						
							
							Add missing imports  
						
						
						
					 
					
						2017-11-01 23:02:51 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							819e30a26e 
							
						 
					 
					
						
						
							
							Tidy up tokenizer exceptions  
						
						
						
					 
					
						2017-11-01 23:02:45 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9659391944 
							
						 
					 
					
						
						
							
							Update deprecated methods and add warnings  
						
						
						
					 
					
						2017-11-01 16:49:42 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d11659463b 
							
						 
					 
					
						
						
							
							Merge pull request  #1152  from jimregan/develop-irish  
						
						... 
						
						
						
						[WIP] attempt a port from #1147  
						
					 
					
						2017-11-01 00:23:43 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7e424a1804 
							
						 
					 
					
						
						
							
							Don't copy exception dicts if not necessary and tidy up  
						
						
						
					 
					
						2017-10-31 21:05:29 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							06c25a8882 
							
						 
					 
					
						
						
							
							Remove comma that caused list to wrap in tuple!  
						
						... 
						
						
						
						Also removed extra dict wrappings for performance (we used to have them in there, but they should only really exist if copying the dict is absolutely necessary) 
						
					 
					
						2017-10-31 20:13:16 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							147448b65b 
							
						 
					 
					
						
						
							
							Add missing symbols  
						
						
						
					 
					
						2017-10-31 19:34:45 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9b0de9fb43 
							
						 
					 
					
						
						
							
							Fix import of symbols (now nested one level lower)  
						
						
						
					 
					
						2017-10-31 19:17:58 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							41dd29e48e 
							
						 
					 
					
						
						
							
							merge  
						
						
						
					 
					
						2017-10-31 14:07:45 +00:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							090bd00369 
							
						 
					 
					
						
						
							
							Merge pull request  #1464  from mayukh18/develop_bengali_pronouns  
						
						... 
						
						
						
						added the bengali pronouns for v2.0 
						
					 
					
						2017-10-25 21:55:25 +02:00 
						 
				 
			
				
					
						
							
							
								mayukh18 
							
						 
					 
					
						
						
						
						
							
						
						
							1bc07758fa 
							
						 
					 
					
						
						
							
							added few bengali pronouns  
						
						
						
					 
					
						2017-10-25 22:24:40 +05:30 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d3bf488e16 
							
						 
					 
					
						
						
							
							Merge pull request  #1171  from mollerhoj/support-danish  
						
						... 
						
						
						
						Improve basic support for Danish 
						
					 
					
						2017-10-24 20:29:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							66766c1454 
							
						 
					 
					
						
						
							
							Restore SP tag to English tag_map, until models migrate  
						
						
						
					 
					
						2017-10-24 17:05:00 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c55db0a4a1 
							
						 
					 
					
						
						
							
							Add example sentences for Japanese and Chinese (see  #1107 )  
						
						
						
					 
					
						2017-10-24 13:02:24 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							66f8f9d4a0 
							
						 
					 
					
						
						
							
							Fix Japanese tokenizer  
						
						... 
						
						
						
						JapaneseTokenizer now returns a Doc, not individual words 
						
					 
					
						2017-10-24 13:02:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							facf77e541 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into support-danish  
						
						
						
					 
					
						2017-10-24 11:53:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							49895fbef6 
							
						 
					 
					
						
						
							
							Rename 'SP' special tag to '_SP'  
						
						... 
						
						
						
						Renaming the tag with an underscore lets us add it to the tag map
without worrying that we'll change the sequence of tags, which throws
off the tag-to-ID mapping. For instance, if we inserted a 'SP' tag,
the "VERB" tag is pushed to a different class ID, and the model is all
messed up. 
						
					 
					
						2017-10-20 14:01:12 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f0d577e460 
							
						 
					 
					
						
						
							
							Merge pull request  #1425  from explosion/feature/hindi-tokenizer  
						
						... 
						
						
						
						💫  Basic Hindi tokenization support 
					
						2017-10-18 13:34:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							839de87ca9 
							
						 
					 
					
						
						
							
							Make lambda func a named function, for pickling  
						
						
						
					 
					
						2017-10-17 18:21:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9ce7d6af87 
							
						 
					 
					
						
						
							
							Make lex attr functions top-level functions, to promote pickling  
						
						
						
					 
					
						2017-10-17 18:19:18 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							aab299c8ae 
							
						 
					 
					
						
						
							
							Merge pull request  #1429  from vishnunekkanti/develop  
						
						... 
						
						
						
						fix syntax error in zh 
						
					 
					
						2017-10-17 14:45:02 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							485c4f6df5 
							
						 
					 
					
						
						
							
							Add Hungarian examples (see  #1107 )  
						
						
						
					 
					
						2017-10-17 02:37:45 +02:00 
						 
				 
			
				
					
						
							
							
								Vishnu Kumar Nekkanti 
							
						 
					 
					
						
						
						
						
							
						
						
							d3c54cf39a 
							
						 
					 
					
						
						
							
							fixed SyntaxError while checking for jieba  
						
						
						
					 
					
						2017-10-16 18:51:33 +05:30 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							266e7180a7 
							
						 
					 
					
						
						
							
							Add Language class, stop words and basic stemmer that sets NORM  
						
						
						
					 
					
						2017-10-14 14:59:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e85e1d571b 
							
						 
					 
					
						
						
							
							Update base punctuation  
						
						
						
					 
					
						2017-10-14 14:59:23 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9d6c8eaa49 
							
						 
					 
					
						
						
							
							Update base norm exceptions with more unicode characters  
						
						... 
						
						
						
						e.g. unicode variations of punctuation used in Chinese 
						
					 
					
						2017-10-14 14:58:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							38c756fd85 
							
						 
					 
					
						
						
							
							Port over changes from  #1287  
						
						
						
					 
					
						2017-10-14 13:16:21 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							612224c10d 
							
						 
					 
					
						
						
							
							Port over changes from  #1157  
						
						
						
					 
					
						2017-10-14 13:11:39 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a4d974d97b 
							
						 
					 
					
						
						
							
							Port over URL pattern changes from  #1411  
						
						
						
					 
					
						2017-10-14 12:58:07 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							09aed58140 
							
						 
					 
					
						
						
							
							Port over changes from  #1333  and add comments  
						
						
						
					 
					
						2017-10-14 12:52:59 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8ce6f96180 
							
						 
					 
					
						
						
							
							Don't make copies of language data components  
						
						
						
					 
					
						2017-10-11 15:34:55 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							417d45f5d0 
							
						 
					 
					
						
						
							
							Add lemmatizer data as variable on language data  
						
						... 
						
						
						
						Don't create lookup lemmatizer within Language class and just pass in
the data so it can be set on Token creation 
						
					 
					
						2017-10-11 02:24:58 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0c2343d73a 
							
						 
					 
					
						
						
							
							Tidy up language data  
						
						
						
					 
					
						2017-10-11 02:22:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8143618497 
							
						 
					 
					
						
						
							
							Set prefix length back to 1  
						
						
						
					 
					
						2017-10-10 19:32:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dce8afb9cf 
							
						 
					 
					
						
						
							
							Set prefix length to 3  
						
						
						
					 
					
						2017-10-09 21:55:55 -05:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							959c46eabe 
							
						 
					 
					
						
						
							
							Merge pull request  #1365  from wannaphongcom/develop  
						
						... 
						
						
						
						Add Thai language for spaCy v2 
						
					 
					
						2017-09-26 23:43:05 +02:00 
						 
				 
			
				
					
						
							
							
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							3d5046c499 
							
						 
					 
					
						
						
							
							fix import in th  
						
						
						
					 
					
						2017-09-26 22:41:20 +07:00 
						 
				 
			
				
					
						
							
							
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							a63f790b8c 
							
						 
					 
					
						
						
							
							fix thai tag_map  
						
						
						
					 
					
						2017-09-26 22:28:57 +07:00 
						 
				 
			
				
					
						
							
							
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							2ea27d07f4 
							
						 
					 
					
						
						
							
							fix tokenizer_exceptions in thai  
						
						
						
					 
					
						2017-09-26 22:14:47 +07:00 
						 
				 
			
				
					
						
							
							
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							a2bf4cc7bf 
							
						 
					 
					
						
						
							
							fix newline in file  
						
						
						
					 
					
						2017-09-26 21:49:43 +07:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bb5c631402 
							
						 
					 
					
						
						
							
							Implement like_num getter for French (via  #1161 )  
						
						
						
					 
					
						2017-09-26 16:47:45 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							15479b3bae 
							
						 
					 
					
						
						
							
							Add comment to like_num re: future work  
						
						
						
					 
					
						2017-09-26 16:43:28 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							adda08fe14 
							
						 
					 
					
						
						
							
							Implement like_num getter for Dutch (via  #1177 )  
						
						
						
					 
					
						2017-09-26 16:39:15 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							5ee10379db 
							
						 
					 
					
						
						
							
							Port over changes from  #1340  
						
						
						
					 
					
						2017-09-26 16:38:08 +02:00 
						 
				 
			
				
					
						
							
							
								Wannaphong Phatthiyaphaibun 
							
						 
					 
					
						
						
						
						
							
						
						
							5cba67146c 
							
						 
					 
					
						
						
							
							add thai in spacy2  
						
						
						
					 
					
						2017-09-26 21:36:27 +07:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							10d291f129 
							
						 
					 
					
						
						
							
							Port over change from  #1351  
						
						
						
					 
					
						2017-09-26 16:11:41 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ece30c28a8 
							
						 
					 
					
						
						
							
							Don't split hyphenated words in German  
						
						... 
						
						
						
						This way, the tokenizer matches the tokenization in German treebanks 
						
					 
					
						2017-09-16 20:40:15 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bd3da3d6fb 
							
						 
					 
					
						
						
							
							Port over change from  #1323  and tidy up  
						
						
						
					 
					
						2017-09-14 19:23:13 +02:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							9dfd301962 
							
						 
					 
					
						
						
							
							rearrange  
						
						
						
					 
					
						2017-09-11 10:14:18 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							1ee75ae337 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into develop-irish  
						
						
						
					 
					
						2017-09-11 08:40:11 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b29e6bff46 
							
						 
					 
					
						
						
							
							Improve lemmatization rule for am|VBP  
						
						
						
					 
					
						2017-09-04 15:18:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e28982e28 
							
						 
					 
					
						
						
							
							Merge pull request  #1288  from geovedi/indonesian  
						
						... 
						
						
						
						Indonesian language support 
						
					 
					
						2017-08-26 21:31:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cfc055734e 
							
						 
					 
					
						
						
							
							Split % in units, for compatibility with corpus  
						
						
						
					 
					
						2017-08-25 20:03:37 -05:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							58d8078971 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into indonesian  
						
						
						
					 
					
						2017-08-25 09:21:49 +08:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb2541ffd3 
							
						 
					 
					
						
						
							
							Fix PROB attr for OOV words  
						
						
						
					 
					
						2017-08-23 12:11:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a68dc891ea 
							
						 
					 
					
						
						
							
							Port over changes from   #1281  
						
						
						
					 
					
						2017-08-21 23:19:18 +02:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							f77443ab68 
							
						 
					 
					
						
						
							
							reworked  
						
						
						
					 
					
						2017-08-20 13:43:21 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							b7d83f37c8 
							
						 
					 
					
						
						
							
							indonesian abbr.  
						
						
						
					 
					
						2017-08-20 12:16:50 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							7193c47f0b 
							
						 
					 
					
						
						
							
							direct lookup  
						
						
						
					 
					
						2017-08-20 11:57:52 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							fdf802d505 
							
						 
					 
					
						
						
							
							added examples  
						
						
						
					 
					
						2017-08-20 11:57:10 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							fa544e6c9a 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into indonesian  
						
						
						
					 
					
						2017-08-20 11:49:40 +07:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1fe5e1a4d1 
							
						 
					 
					
						
						
							
							Add language example sentences (see  #1107 )  
						
						... 
						
						
						
						da, de, en, es, fr, he, it, nb, pl, pt, sv 
						
					 
					
						2017-08-19 12:22:29 +02:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							c069b4acb5 
							
						 
					 
					
						
						
							
							fix in UD submitted; map either way  
						
						
						
					 
					
						2017-08-08 19:22:14 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							76c22dec4d 
							
						 
					 
					
						
						
							
							UD Irish tag mapping  
						
						
						
					 
					
						2017-08-08 19:04:52 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							95921d7d4c 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into develop-irish  
						
						
						
					 
					
						2017-08-08 17:21:27 +01:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							37f19f5ed2 
							
						 
					 
					
						
						
							
							added more currencies based on corpus data  
						
						
						
					 
					
						2017-08-03 13:03:25 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							30fd068d42 
							
						 
					 
					
						
						
							
							hashtag prefix should be handled somewhere else  
						
						
						
					 
					
						2017-08-03 13:03:02 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							ba07e23c87 
							
						 
					 
					
						
						
							
							added USD in currency rules  
						
						
						
					 
					
						2017-08-02 22:42:47 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							bb08d696f9 
							
						 
					 
					
						
						
							
							added hashtag rule and fixed currency rules  
						
						
						
					 
					
						2017-07-30 21:23:28 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							e9af79a803 
							
						 
					 
					
						
						
							
							added u-\d+ rules (sports team)  
						
						
						
					 
					
						2017-07-30 21:23:01 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							e5adc26c72 
							
						 
					 
					
						
						
							
							simplified rules  
						
						
						
					 
					
						2017-07-29 18:21:32 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							4d04898dea 
							
						 
					 
					
						
						
							
							updated regexp  
						
						
						
					 
					
						2017-07-29 17:44:57 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							7d96d477ea 
							
						 
					 
					
						
						
							
							updated like_num  
						
						
						
					 
					
						2017-07-29 17:44:46 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							3cca4ed798 
							
						 
					 
					
						
						
							
							added lex attrs rules  
						
						
						
					 
					
						2017-07-29 17:22:21 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							8b814c63f1 
							
						 
					 
					
						
						
							
							more exceptions  
						
						
						
					 
					
						2017-07-27 19:46:30 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							6c725e8dcf 
							
						 
					 
					
						
						
							
							updated lemma  
						
						
						
					 
					
						2017-07-27 19:46:21 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							547973b92a 
							
						 
					 
					
						
						
							
							wip syntax iterators  
						
						
						
					 
					
						2017-07-27 10:51:34 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							bbc75da38d 
							
						 
					 
					
						
						
							
							enable syntax iterator and lemma lookup  
						
						
						
					 
					
						2017-07-27 10:51:15 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							24a8c8bf28 
							
						 
					 
					
						
						
							
							added wip lemma dict  
						
						
						
					 
					
						2017-07-26 21:39:54 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							63f14ba46b 
							
						 
					 
					
						
						
							
							added hyphen-suffix rules  
						
						
						
					 
					
						2017-07-26 19:28:57 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							f288964441 
							
						 
					 
					
						
						
							
							removed -el from suffix rules  
						
						
						
					 
					
						2017-07-26 19:28:38 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							6eee7a7411 
							
						 
					 
					
						
						
							
							updated tokenizer exceptions  
						
						
						
					 
					
						2017-07-26 19:13:47 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							edec51b1b1 
							
						 
					 
					
						
						
							
							update punctuation rules  
						
						
						
					 
					
						2017-07-26 19:13:36 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							62443d495a 
							
						 
					 
					
						
						
							
							enable token match  
						
						
						
					 
					
						2017-07-26 19:13:14 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							c97f5ae0bb 
							
						 
					 
					
						
						
							
							updated tokenizer exceptions  
						
						
						
					 
					
						2017-07-26 19:12:52 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							73f6ac9d9b 
							
						 
					 
					
						
						
							
							added hyhen  
						
						
						
					 
					
						2017-07-24 15:56:31 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							68454c40bf 
							
						 
					 
					
						
						
							
							added missing import  
						
						
						
					 
					
						2017-07-24 14:12:34 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							eaf9cbd708 
							
						 
					 
					
						
						
							
							cursed of copy & paste  
						
						
						
					 
					
						2017-07-24 14:11:51 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							7aad6718bc 
							
						 
					 
					
						
						
							
							enable tokenizer exceptions  
						
						
						
					 
					
						2017-07-24 14:11:10 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							ad56c9179a 
							
						 
					 
					
						
						
							
							added tokenizer exceptions list  
						
						
						
					 
					
						2017-07-24 14:10:16 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							c1f3fe99fe 
							
						 
					 
					
						
						
							
							updated punctuation rules  
						
						
						
					 
					
						2017-07-24 13:57:21 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							37fa2c8c80 
							
						 
					 
					
						
						
							
							punctution rules  
						
						
						
					 
					
						2017-07-24 06:17:18 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							082e94ac1c 
							
						 
					 
					
						
						
							
							added inflix rules  
						
						
						
					 
					
						2017-07-24 06:17:07 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							d0ec484725 
							
						 
					 
					
						
						
							
							reverted  
						
						
						
					 
					
						2017-07-24 06:16:29 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							0e590c711f 
							
						 
					 
					
						
						
							
							added prefix & suffix rules  
						
						
						
					 
					
						2017-07-23 23:46:40 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							ba922e30e8 
							
						 
					 
					
						
						
							
							added ampere hour unit  
						
						
						
					 
					
						2017-07-23 23:46:18 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							3b17eba27b 
							
						 
					 
					
						
						
							
							added frequency units  
						
						
						
					 
					
						2017-07-23 23:10:52 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							d5fd32a572 
							
						 
					 
					
						
						
							
							added known currencies  
						
						
						
					 
					
						2017-07-23 22:56:48 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							f6f15678fb 
							
						 
					 
					
						
						
							
							added lex_attrs  
						
						
						
					 
					
						2017-07-23 22:55:22 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							bed8162d00 
							
						 
					 
					
						
						
							
							added tokenizer_exceptions  
						
						
						
					 
					
						2017-07-23 22:55:05 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							b80c35bc9a 
							
						 
					 
					
						
						
							
							added norm_exceptions  
						
						
						
					 
					
						2017-07-23 22:54:49 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							b5de329ea3 
							
						 
					 
					
						
						
							
							added norm_exceptions  
						
						
						
					 
					
						2017-07-23 22:54:19 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							082e9ade46 
							
						 
					 
					
						
						
							
							fixed typo  
						
						
						
					 
					
						2017-07-23 21:30:34 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							e2efeb186e 
							
						 
					 
					
						
						
							
							added stopwords  
						
						
						
					 
					
						2017-07-23 20:52:37 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							da98676839 
							
						 
					 
					
						
						
							
							use template  
						
						
						
					 
					
						2017-07-23 20:51:31 +07:00 
						 
				 
			
				
					
						
							
							
								Jim Geovedi 
							
						 
					 
					
						
						
						
						
							
						
						
							c2b4dd7809 
							
						 
					 
					
						
						
							
							start working on Indonesian language  
						
						
						
					 
					
						2017-07-23 20:50:56 +07:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							85144835da 
							
						 
					 
					
						
						
							
							Add Tag_map for Danish  
						
						
						
					 
					
						2017-07-03 15:52:55 +02:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							64c732918a 
							
						 
					 
					
						
						
							
							Add Morph_rules. (TODO: Not working?)  
						
						
						
					 
					
						2017-07-03 15:52:55 +02:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							3b2cb107a3 
							
						 
					 
					
						
						
							
							Add like_num functionality to Danish  
						
						
						
					 
					
						2017-07-03 15:49:51 +02:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							e8f40ceed8 
							
						 
					 
					
						
						
							
							Add short names of months to tokenizer_exceptions  
						
						
						
					 
					
						2017-07-03 15:49:51 +02:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							23025d3b05 
							
						 
					 
					
						
						
							
							Clean up a couple of strange English stopwords  
						
						
						
					 
					
						2017-07-03 15:41:59 +02:00 
						 
				 
			
				
					
						
							
							
								mollerhoj 
							
						 
					 
					
						
						
						
						
							
						
						
							dc5be7d2f3 
							
						 
					 
					
						
						
							
							Cleanup list of Danish stopwords  
						
						
						
					 
					
						2017-07-03 15:40:58 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c91642efd5 
							
						 
					 
					
						
						
							
							Port over changes from  #1168  
						
						
						
					 
					
						2017-07-01 11:43:54 +02:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							70f4d26c10 
							
						 
					 
					
						
						
							
							bounds checks  
						
						
						
					 
					
						2017-06-28 10:59:46 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							1ba38b2036 
							
						 
					 
					
						
						
							
							some helpers; the Irish part of UD only has 2500 sentences so this will need source of morphology  
						
						
						
					 
					
						2017-06-28 00:42:00 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							559e03605a 
							
						 
					 
					
						
						
							
							b'  
						
						
						
					 
					
						2017-06-27 22:42:16 +01:00 
						 
				 
			
				
					
						
							
							
								Jim Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							d81ceb0cd5 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into polish  
						
						
						
					 
					
						2017-06-26 22:42:27 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							2f84c73585 
							
						 
					 
					
						
						
							
							a start  
						
						
						
					 
					
						2017-06-26 22:40:04 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							28d7f0a672 
							
						 
					 
					
						
						
							
							reference  
						
						
						
					 
					
						2017-06-26 22:38:28 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							e12defdd9c 
							
						 
					 
					
						
						
							
							missed a couple  
						
						
						
					 
					
						2017-06-26 22:24:14 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							c1e4e0f3bf 
							
						 
					 
					
						
						
							
							just now discovered that you can do multiwords  
						
						
						
					 
					
						2017-06-26 22:19:39 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							5e5f94c1c0 
							
						 
					 
					
						
						
							
							fix dup  
						
						
						
					 
					
						2017-06-26 21:57:00 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							a8dff9133e 
							
						 
					 
					
						
						
							
							add POS  
						
						
						
					 
					
						2017-06-26 21:53:41 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							e9213f54de 
							
						 
					 
					
						
						
							
							missed one  
						
						
						
					 
					
						2017-06-26 21:29:21 +01:00 
						 
				 
			
				
					
						
							
							
								Jim O'Regan 
							
						 
					 
					
						
						
						
						
							
						
						
							1eb7cc3017 
							
						 
					 
					
						
						
							
							attempt a port from  #1147  
						
						
						
					 
					
						2017-06-26 21:24:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							91e52543ef 
							
						 
					 
					
						
						
							
							Merge pull request  #1118  from Gregory-Howard/patch-2  
						
						... 
						
						
						
						Update _tokenizer_exceptions_list (adding cities) 
						
					 
					
						2017-06-20 11:16:07 +02:00 
						 
				 
			
				
					
						
							
							
								Tpt 
							
						 
					 
					
						
						
						
						
							
						
						
							7745b3ae04 
							
						 
					 
					
						
						
							
							Adds noun chunks to French syntax iterators  
						
						
						
					 
					
						2017-06-12 15:29:58 +02:00 
						 
				 
			
				
					
						
							
							
								Grégory Howard 
							
						 
					 
					
						
						
						
						
							
						
						
							cd974b32b7 
							
						 
					 
					
						
						
							
							Update _tokenizer_exceptions_list (adding cities)  
						
						
						
					 
					
						2017-06-09 17:58:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							55d0621532 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-06-04 15:53:25 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e28f90b672 
							
						 
					 
					
						
						
							
							Fix syntax iterators  
						
						
						
					 
					
						2017-06-04 15:51:50 -05:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							112c5787eb 
							
						 
					 
					
						
						
							
							Merge pull request  #1101  from oroszgy/hu_tokenizer_fix  
						
						... 
						
						
						
						More robust Hungarian tokenizer. 
						
					 
					
						2017-06-04 22:37:51 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9254a3dd78 
							
						 
					 
					
						
						
							
							Import and add Spanish syntax iterators  
						
						
						
					 
					
						2017-06-04 21:42:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ca215bc26 
							
						 
					 
					
						
						
							
							Resolve lex_attr_getters conflict  
						
						
						
					 
					
						2017-06-03 16:12:01 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							4c643d74c5 
							
						 
					 
					
						
						
							
							Add norm exceptions to other Language classes  
						
						
						
					 
					
						2017-06-03 22:29:21 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fa7e576c57 
							
						 
					 
					
						
						
							
							Change order of exception dicts  
						
						
						
					 
					
						2017-06-03 21:52:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3f5c85d8de 
							
						 
					 
					
						
						
							
							Reorder setting of lex attrs, to avoid clobbering  
						
						
						
					 
					
						2017-06-03 14:47:55 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aeb7520133 
							
						 
					 
					
						
						
							
							Make norm use lower-case  
						
						
						
					 
					
						2017-06-03 14:47:38 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							de3954843e 
							
						 
					 
					
						
						
							
							Populate norm exceptions with lower-case  
						
						
						
					 
					
						2017-06-03 14:47:12 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e47eef5e03 
							
						 
					 
					
						
						
							
							Update German tokenizer exceptions and tests  
						
						
						
					 
					
						2017-06-03 21:07:44 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0d6fa8b241 
							
						 
					 
					
						
						
							
							Add German norm exceptions  
						
						
						
					 
					
						2017-06-03 20:54:18 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							5bd311c77e 
							
						 
					 
					
						
						
							
							Fix update of norm exceptions  
						
						
						
					 
					
						2017-06-03 20:54:09 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							746653880c 
							
						 
					 
					
						
						
							
							Add English norm exceptions to lex_attrs  
						
						
						
					 
					
						2017-06-03 20:27:28 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							095eeeb12f 
							
						 
					 
					
						
						
							
							Update English tokenizer exceptions and add norms  
						
						
						
					 
					
						2017-06-03 20:27:16 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e5d426406a 
							
						 
					 
					
						
						
							
							Add base norm exceptions  
						
						
						
					 
					
						2017-06-03 20:27:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2f1025a94c 
							
						 
					 
					
						
						
							
							Port over Spanish changes from  #1096  
						
						
						
					 
					
						2017-06-02 19:09:58 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							f0c3b09242 
							
						 
					 
					
						
						
							
							More robust Hungarian tokenizer.  
						
						
						
					 
					
						2017-05-31 22:28:40 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							8c0b4b850e 
							
						 
					 
					
						
						
							
							Fixed emoji handling for Hungarian  
						
						
						
					 
					
						2017-05-30 21:34:46 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							84189c1cab 
							
						 
					 
					
						
						
							
							Add 'xx' language ID for multi-language support  
						
						... 
						
						
						
						Allows models to specify their language ID as 'xx'. 
						
					 
					
						2017-05-28 00:58:59 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							33e332e67c 
							
						 
					 
					
						
						
							
							Remove unused export  
						
						
						
					 
					
						2017-05-28 00:57:59 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a8e58e04ef 
							
						 
					 
					
						
						
							
							Add symbols class to punctuation rules to handle emoji (see  #1088 )  
						
						... 
						
						
						
						Currently doesn't work for Hungarian, because of conflicts with the
custom punctuation rules. Also doesn't take multi-character emoji like
👩🏽💻  into account. 
						
					 
					
						2017-05-27 17:57:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5db89053aa 
							
						 
					 
					
						
						
							
							Merge docstrings  
						
						
						
					 
					
						2017-05-21 13:46:23 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							924e8506de 
							
						 
					 
					
						
						
							
							Move Defaults subclass to module scope (necessary for pickling)  
						
						
						
					 
					
						2017-05-20 19:02:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							61fe55efba 
							
						 
					 
					
						
						
							
							Move EnglishDefaults class out of English  
						
						
						
					 
					
						2017-05-20 02:18:19 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8815507f8e 
							
						 
					 
					
						
						
							
							Move SpanishDefaults out of Language class, for pickle  
						
						
						
					 
					
						2017-05-18 04:28:51 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1a05078c79 
							
						 
					 
					
						
						
							
							Add language-specific syntax iterators to en and de  
						
						
						
					 
					
						2017-05-17 12:04:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4b9d69f428 
							
						 
					 
					
						
						
							
							Merge branch 'v2' into develop  
						
						... 
						
						
						
						* Move v2 parser into nn_parser.pyx
* New TokenVectorEncoder class in pipeline.pyx
* New spacy/_ml.py module
Currently the two parsers live side-by-side, until we figure out how to
organize them. 
						
					 
					
						2017-05-14 01:10:23 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a4a37a783e 
							
						 
					 
					
						
						
							
							Remove import from non-existing module  
						
						
						
					 
					
						2017-05-13 16:00:09 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c13b3fa052 
							
						 
					 
					
						
						
							
							Add LEX_ATTRS  
						
						
						
					 
					
						2017-05-12 15:37:45 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bca2ea9c72 
							
						 
					 
					
						
						
							
							Update Portuguese lexical attributes  
						
						
						
					 
					
						2017-05-12 15:37:39 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2f870123bf 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-05-12 15:37:20 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ca65993d59 
							
						 
					 
					
						
						
							
							Add basic Polish Language class  
						
						
						
					 
					
						2017-05-12 09:25:37 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							48177c4f92 
							
						 
					 
					
						
						
							
							Add missing tokenizer exceptions  
						
						
						
					 
					
						2017-05-12 09:25:24 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bb8be3d194 
							
						 
					 
					
						
						
							
							Add Danish language data  
						
						
						
					 
					
						2017-05-10 21:15:12 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a0b00624bb 
							
						 
					 
					
						
						
							
							Make sure like_email returns bool  
						
						
						
					 
					
						2017-05-09 11:37:29 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ea60932e1b 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-05-09 11:08:14 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							02d0ac5cab 
							
						 
					 
					
						
						
							
							Remove redundant function and fix formatting  
						
						
						
					 
					
						2017-05-09 11:06:04 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							b5ca50607e 
							
						 
					 
					
						
						
							
							Reorganise entity rules  
						
						
						
					 
					
						2017-05-09 01:37:10 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							12c3d5fbba 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-05-09 01:15:28 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2829a024ef 
							
						 
					 
					
						
						
							
							Re-add basic like_num check to global lex_attrs  
						
						
						
					 
					
						2017-05-09 01:15:23 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							88adeee548 
							
						 
					 
					
						
						
							
							Add English lex_attrs overrides  
						
						
						
					 
					
						2017-05-09 01:09:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8f3fbbb147 
							
						 
					 
					
						
						
							
							Fix typos  
						
						
						
					 
					
						2017-05-09 01:09:37 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2216e5f326 
							
						 
					 
					
						
						
							
							Reorganise lex_attrs and add dict  
						
						
						
					 
					
						2017-05-09 00:57:54 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e666f14d20 
							
						 
					 
					
						
						
							
							Add global lex_attrs  
						
						
						
					 
					
						2017-05-09 00:41:53 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							41972c43fe 
							
						 
					 
					
						
						
							
							Use consistent regex imports  
						
						
						
					 
					
						2017-05-09 00:34:31 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9f0fd5963f 
							
						 
					 
					
						
						
							
							Reorganise Hungarian punctuation rules  
						
						
						
					 
					
						2017-05-09 00:01:59 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fc0d793360 
							
						 
					 
					
						
						
							
							Reorganise Bengali punctuation rules  
						
						
						
					 
					
						2017-05-09 00:01:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e895d1afd7 
							
						 
					 
					
						
						
							
							Reorganise French punctuation rules  
						
						
						
					 
					
						2017-05-09 00:00:54 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							014bda0ae3 
							
						 
					 
					
						
						
							
							Reorganise global punctuation rules  
						
						
						
					 
					
						2017-05-09 00:00:46 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a91278cb32 
							
						 
					 
					
						
						
							
							Rename _URL_PATTERN to URL_PATTERN  
						
						
						
					 
					
						2017-05-09 00:00:00 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							604f299cf6 
							
						 
					 
					
						
						
							
							Add char classes to global language data  
						
						
						
					 
					
						2017-05-08 23:59:33 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f6f5d78cb9 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-05-08 23:59:17 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3c0f85de8e 
							
						 
					 
					
						
						
							
							Remove imports in /lang/__init__.py  
						
						
						
					 
					
						2017-05-08 23:58:07 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							614aa09582 
							
						 
					 
					
						
						
							
							Tidy up Bengali tokenizer exceptions  
						
						
						
					 
					
						2017-05-08 22:29:49 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							73b577cb01 
							
						 
					 
					
						
						
							
							Fix relative imports  
						
						
						
					 
					
						2017-05-08 22:29:04 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ae99990f63 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-05-08 22:23:48 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f46ffe3e89 
							
						 
					 
					
						
						
							
							Move language data to /lang module  
						
						
						
					 
					
						2017-05-08 20:00:40 +02:00