Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cb0e727c54 
							
						 
					 
					
						
						
							
							Merge pull request  #1415  from IamJeffG/fix-alpha-example-train-ner-standalone  
						
						... 
						
						
						
						Bugfix example script train_ner_standalone.py, fails after training 
						
					 
					
						2017-10-12 21:44:28 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9c6de3dcfa 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/cli-validate  
						
						
						
					 
					
						2017-10-12 21:44:28 +02:00 
						 
				 
			
				
					
						
							
							
								Jeffrey Gerard 
							
						 
					 
					
						
						
						
						
							
						
						
							5ba970b495 
							
						 
					 
					
						
						
							
							minor cleanup  
						
						
						
					 
					
						2017-10-12 12:34:46 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							462caf835a 
							
						 
					 
					
						
						
							
							Fix SBD test  
						
						
						
					 
					
						2017-10-12 21:18:22 +02:00 
						 
				 
			
				
					
						
							
							
								Jeffrey Gerard 
							
						 
					 
					
						
						
						
						
							
						
						
							39d3cbfdba 
							
						 
					 
					
						
						
							
							Bugfix example script train_ner_standalone.py, fails after training  
						
						
						
					 
					
						2017-10-12 11:39:12 -07:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fff1028391 
							
						 
					 
					
						
						
							
							Add validate CLI command  
						
						
						
					 
					
						2017-10-12 20:05:06 +02:00 
						 
				 
			
				
					
						
							
							
								yuukos 
							
						 
					 
					
						
						
						
						
							
						
						
							f81dd284eb 
							
						 
					 
					
						
						
							
							updated spacy/__init__.py  
						
						... 
						
						
						
						registered russian language via set_lang_class 
						
					 
					
						2017-10-12 22:28:34 +07:00 
						 
				 
			
				
					
						
							
							
								yuukos 
							
						 
					 
					
						
						
						
						
							
						
						
							7b9491679f 
							
						 
					 
					
						
						
							
							added russian language support  
						
						
						
					 
					
						2017-10-12 22:24:20 +07:00 
						 
				 
			
				
					
						
							
							
								yuukos 
							
						 
					 
					
						
						
						
						
							
						
						
							2a78f4d634 
							
						 
					 
					
						
						
							
							updated .gitignore file  
						
						... 
						
						
						
						added excluding PyCharm's idea directory 
						
					 
					
						2017-10-12 22:23:19 +07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							908f44c3fe 
							
						 
					 
					
						
						
							
							Disable history features by default  
						
						
						
					 
					
						2017-10-12 14:56:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a955843684 
							
						 
					 
					
						
						
							
							Increase default number of epochs  
						
						
						
					 
					
						2017-10-12 13:13:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cecfcc7711 
							
						 
					 
					
						
						
							
							Set default hyper params back to 'slow' settings  
						
						
						
					 
					
						2017-10-12 13:12:26 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							37aa523a8e 
							
						 
					 
					
						
						
							
							Merge pull request  #1408  from explosion/feature/dot-underscore  
						
						... 
						
						
						
						💫  Custom attributes via Doc._, Token._ and Span._ 
					
						2017-10-11 18:35:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							40dbc85ffa 
							
						 
					 
					
						
						
							
							Merge pull request  #1413  from explosion/feature/lemmatizer  
						
						... 
						
						
						
						💫   Integrate lookup lemmatization (9+ languages) 
					
						2017-10-11 17:54:36 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8ce6f96180 
							
						 
					 
					
						
						
							
							Don't make copies of language data components  
						
						
						
					 
					
						2017-10-11 15:34:55 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a06b84e7cc 
							
						 
					 
					
						
						
							
							Merge pull request  #1407  from hscspring/patch-6  
						
						... 
						
						
						
						Update training.jade 
						
					 
					
						2017-10-11 14:25:38 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							eac9e99086 
							
						 
					 
					
						
						
							
							Update docs on adding lemmatization to languages  
						
						
						
					 
					
						2017-10-11 14:21:15 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							51519251c2 
							
						 
					 
					
						
						
							
							Fix underscore method test  
						
						
						
					 
					
						2017-10-11 13:34:19 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c6ae49e8bf 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-10-11 13:34:11 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							453c47ca24 
							
						 
					 
					
						
						
							
							Add German lemmatizer tests  
						
						
						
					 
					
						2017-10-11 13:27:26 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							15fe0fd82d 
							
						 
					 
					
						
						
							
							Fix tests  
						
						
						
					 
					
						2017-10-11 13:27:18 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6dd14dc342 
							
						 
					 
					
						
						
							
							Add lookup lemmas to tokens without POS tags  
						
						
						
					 
					
						2017-10-11 13:27:10 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9620c1a640 
							
						 
					 
					
						
						
							
							Add lemma_lookup to Language defaults  
						
						
						
					 
					
						2017-10-11 13:26:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9fd471372a 
							
						 
					 
					
						
						
							
							Add lookup lemmatizer to lemmatizer as lookup() method  
						
						
						
					 
					
						2017-10-11 13:25:51 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e0ff145a8b 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/dot-underscore  
						
						
						
					 
					
						2017-10-11 11:57:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c1d6d43c83 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/lemmatizer  
						
						
						
					 
					
						2017-10-11 11:56:35 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ffc2fef13c 
							
						 
					 
					
						
						
							
							Merge pull request  #1411  from raphael0202/issue_1078  
						
						... 
						
						
						
						Resolve issue #1078  by simplifying URL pattern 
						
					 
					
						2017-10-11 11:54:57 +02:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							3452d6ce52 
							
						 
					 
					
						
						
							
							Resolve issue  #1078  by simplifying URL pattern  
						
						... 
						
						
						
						- avoid catastrophic backtracking
- reduce character range of host name, domain name and TLD identifier 
						
					 
					
						2017-10-11 11:24:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17c467e0ab 
							
						 
					 
					
						
						
							
							Avoid clobbering existing lemmas  
						
						
						
					 
					
						2017-10-11 03:33:06 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							807e109f2b 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-10-11 02:47:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e552c9d83 
							
						 
					 
					
						
						
							
							Prune number of non-projective labels more aggressiely  
						
						
						
					 
					
						2017-10-11 02:46:44 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76fe24f44d 
							
						 
					 
					
						
						
							
							Improve embedding defaults  
						
						
						
					 
					
						2017-10-11 09:44:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							188f620046 
							
						 
					 
					
						
						
							
							Improve parser defaults  
						
						
						
					 
					
						2017-10-11 09:43:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							acba2e1051 
							
						 
					 
					
						
						
							
							Fix metadata in training  
						
						
						
					 
					
						2017-10-11 08:55:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74c2c6a58c 
							
						 
					 
					
						
						
							
							Add default name and lang to meta  
						
						
						
					 
					
						2017-10-11 08:49:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3814a161e6 
							
						 
					 
					
						
						
							
							Avoid clobbering preset lemmas  
						
						
						
					 
					
						2017-10-11 08:41:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd47f8e89f 
							
						 
					 
					
						
						
							
							Fix failing test  
						
						
						
					 
					
						2017-10-11 08:38:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							462b2e26b4 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-10-11 08:23:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ac4699eb 
							
						 
					 
					
						
						
							
							Allow Morphology class to setup tokens  
						
						... 
						
						
						
						Add Morphology.assign_untagged() C-method, and call it from
Doc.push_back() when a token is created. This gives a place
to allow the Morphology class to initialize token data. 
						
					 
					
						2017-10-11 03:24:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3b527fa52b 
							
						 
					 
					
						
						
							
							Call morphology.assign_untagged when pushing token to Doc  
						
						
						
					 
					
						2017-10-11 03:23:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c15d8278cb 
							
						 
					 
					
						
						
							
							Avoid lemmatizing inappropriate tags in English lemmatizer  
						
						
						
					 
					
						2017-10-11 03:23:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d528b6e36d 
							
						 
					 
					
						
						
							
							Add assign_untagged method in Morphology  
						
						
						
					 
					
						2017-10-11 03:22:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c118ab3a6 
							
						 
					 
					
						
						
							
							Add tests for Doc creation  
						
						
						
					 
					
						2017-10-11 03:21:23 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f4ae6763b9 
							
						 
					 
					
						
						
							
							Fix consistency of imports from spacy.tokens in examples  
						
						
						
					 
					
						2017-10-11 02:30:40 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							820bf85075 
							
						 
					 
					
						
						
							
							Move LookupLemmatizer to spacy.lemmatizer  
						
						
						
					 
					
						2017-10-11 02:25:13 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							417d45f5d0 
							
						 
					 
					
						
						
							
							Add lemmatizer data as variable on language data  
						
						... 
						
						
						
						Don't create lookup lemmatizer within Language class and just pass in
the data so it can be set on Token creation 
						
					 
					
						2017-10-11 02:24:58 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0c2343d73a 
							
						 
					 
					
						
						
							
							Tidy up language data  
						
						
						
					 
					
						2017-10-11 02:22:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d84136b4a9 
							
						 
					 
					
						
						
							
							Update add label test  
						
						
						
					 
					
						2017-10-10 22:57:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3065f12ef2 
							
						 
					 
					
						
						
							
							Make add parser label work for hidden_depth=0  
						
						
						
					 
					
						2017-10-10 22:57:31 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bfd58dd0fc 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/dot-underscore  
						
						
						
					 
					
						2017-10-10 22:03:51 +02:00