Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							39798b0172 
							
						 
					 
					
						
						
							
							Uncomment layernorm adjustment hack  
						
						 
						
						
						
					 
					
						2017-10-04 15:12:09 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							774f5732bd 
							
						 
					 
					
						
						
							
							Fix dimensionality of textcat when no vectors available  
						
						 
						
						
						
					 
					
						2017-10-04 14:55:15 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af75b74208 
							
						 
					 
					
						
						
							
							Unset LayerNorm backwards compat hack  
						
						 
						
						
						
					 
					
						2017-10-03 20:47:10 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							246612cb53 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into feature/parser-history-model  
						
						 
						
						
						
					 
					
						2017-10-03 16:56:42 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5cbefcba17 
							
						 
					 
					
						
						
							
							Set backwards compatibility flag  
						
						 
						
						
						
					 
					
						2017-10-03 20:29:58 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5454b20cd7 
							
						 
					 
					
						
						
							
							Update thinc imports for 6.9  
						
						 
						
						
						
					 
					
						2017-10-03 20:07:17 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e514d6aa0a 
							
						 
					 
					
						
						
							
							Import thinc modules more explicitly, to avoid cycles  
						
						 
						
						
						
					 
					
						2017-10-03 18:49:25 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b770f4e108 
							
						 
					 
					
						
						
							
							Fix embed class in history features  
						
						 
						
						
						
					 
					
						2017-10-03 13:26:55 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6aa6a5bc25 
							
						 
					 
					
						
						
							
							Add a layer type for history features  
						
						 
						
						
						
					 
					
						2017-10-03 12:43:09 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f6330d69e6 
							
						 
					 
					
						
						
							
							Default embed size to 7000  
						
						 
						
						
						
					 
					
						2017-09-28 08:07:41 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1a37a2c0a0 
							
						 
					 
					
						
						
							
							Update training defaults  
						
						 
						
						
						
					 
					
						2017-09-27 11:48:07 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e34e70673f 
							
						 
					 
					
						
						
							
							Allow tagger models to be built with pre-defined tok2vec layer  
						
						 
						
						
						
					 
					
						2017-09-26 05:51:52 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							63bd87508d 
							
						 
					 
					
						
						
							
							Don't use iterated convolutions  
						
						 
						
						
						
					 
					
						2017-09-23 04:39:17 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4348c479fc 
							
						 
					 
					
						
						
							
							Merge pre-trained vectors and noshare patches  
						
						 
						
						
						
					 
					
						2017-09-22 20:07:28 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4bd6a12b1f 
							
						 
					 
					
						
						
							
							Fix Tok2Vec  
						
						 
						
						
						
					 
					
						2017-09-23 02:58:54 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							980fb6e854 
							
						 
					 
					
						
						
							
							Refactor Tok2Vec  
						
						 
						
						
						
					 
					
						2017-09-22 09:38:36 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d9124f1aa3 
							
						 
					 
					
						
						
							
							Add link_vectors_to_models function  
						
						 
						
						
						
					 
					
						2017-09-22 09:38:22 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a186596307 
							
						 
					 
					
						
						
							
							Add 'reapply' combinator, for iterated CNN  
						
						 
						
						
						
					 
					
						2017-09-22 09:37:03 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							40a4873b70 
							
						 
					 
					
						
						
							
							Fix serialization of model options  
						
						 
						
						
						
					 
					
						2017-09-21 13:07:26 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20193371f5 
							
						 
					 
					
						
						
							
							Don't share CNN, to reduce complexities  
						
						 
						
						
						
					 
					
						2017-09-21 14:59:48 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f5144f04be 
							
						 
					 
					
						
						
							
							Add argument for CNN maxout pieces  
						
						 
						
						
						
					 
					
						2017-09-20 19:14:41 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							78301b2d29 
							
						 
					 
					
						
						
							
							Avoid comparison to None in Tok2Vec  
						
						 
						
						
						
					 
					
						2017-09-20 00:19:34 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3fa76c17d1 
							
						 
					 
					
						
						
							
							Refactor Tok2Vec  
						
						 
						
						
						
					 
					
						2017-09-18 15:00:05 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7b3f391f80 
							
						 
					 
					
						
						
							
							Try dropping the Affine layer, conditionally  
						
						 
						
						
						
					 
					
						2017-09-18 11:35:59 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2148ae605b 
							
						 
					 
					
						
						
							
							Dont use iterated convolutions  
						
						 
						
						
						
					 
					
						2017-09-17 17:36:04 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8f42f8d305 
							
						 
					 
					
						
						
							
							Remove unused 'preprocess' argument in Tok2Vec'  
						
						 
						
						
						
					 
					
						2017-09-17 12:30:16 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8f913a74ca 
							
						 
					 
					
						
						
							
							Fix defaults and args to build_tagger_model  
						
						 
						
						
						
					 
					
						2017-09-17 05:46:36 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2a93404da6 
							
						 
					 
					
						
						
							
							Support optional pre-trained vectors in tensorizer model  
						
						 
						
						
						
					 
					
						2017-09-16 12:45:37 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							24ff6b0ad9 
							
						 
					 
					
						
						
							
							Fix parsing and tok2vec models  
						
						 
						
						
						
					 
					
						2017-09-06 05:50:58 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							16e25ce3b5 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						 
						
						
						
					 
					
						2017-09-04 09:26:53 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9f512e657a 
							
						 
					 
					
						
						
							
							Fix drop_layer calculation  
						
						 
						
						
						
					 
					
						2017-09-04 09:26:38 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c0eaba8b28 
							
						 
					 
					
						
						
							
							Fix low-data textcat  
						
						 
						
						
						
					 
					
						2017-09-02 15:17:32 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3b69bcb3d 
							
						 
					 
					
						
						
							
							Add low_data mode in textcat  
						
						 
						
						
						
					 
					
						2017-09-02 14:56:30 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a824cf8f9a 
							
						 
					 
					
						
						
							
							Adjust text classification model  
						
						 
						
						
						
					 
					
						2017-09-02 11:41:00 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac040b99bb 
							
						 
					 
					
						
						
							
							Add support for pre-trained vectors in text classifier  
						
						 
						
						
						
					 
					
						2017-09-01 16:39:55 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6d4e8e14ca 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						 
						
						
						
					 
					
						2017-08-25 12:37:16 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ce5531389 
							
						 
					 
					
						
						
							
							Use layer norm instead of batch norm  
						
						 
						
						
						
					 
					
						2017-08-25 12:37:10 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1c5c256e58 
							
						 
					 
					
						
						
							
							Fix fine_tune when optimizer is None  
						
						 
						
						
						
					 
					
						2017-08-23 10:51:33 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c580ad28a 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						 
						
						
						
					 
					
						2017-08-22 17:02:04 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a4633fff6f 
							
						 
					 
					
						
						
							
							Restore use of batch norm in model  
						
						 
						
						
						
					 
					
						2017-08-22 17:01:58 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							df2745eb08 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						 
						
						
						
					 
					
						2017-08-22 19:00:43 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							18b64e79ec 
							
						 
					 
					
						
						
							
							Fix fine tuning  
						
						 
						
						
						
					 
					
						2017-08-21 19:18:26 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a21d8f3f0b 
							
						 
					 
					
						
						
							
							Add predict paths to _ml models  
						
						 
						
						
						
					 
					
						2017-08-21 23:23:45 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							80acbc5f1f 
							
						 
					 
					
						
						
							
							Fix fine-tune weight mixture  
						
						 
						
						
						
					 
					
						2017-08-21 14:15:29 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c10f63bf10 
							
						 
					 
					
						
						
							
							Initialize fine tuning to 0.5  
						
						 
						
						
						
					 
					
						2017-08-20 15:59:48 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a59718fd6 
							
						 
					 
					
						
						
							
							Fix fine-tuning  
						
						 
						
						
						
					 
					
						2017-08-20 18:17:35 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bae59bf92f 
							
						 
					 
					
						
						
							
							Remove BiLSTM import  
						
						 
						
						
						
					 
					
						2017-08-18 22:46:59 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fe90dfc390 
							
						 
					 
					
						
						
							
							Restore changes from nn-beam-parser to spacy/_ml  
						
						 
						
						
						
					 
					
						2017-08-18 22:38:28 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ce321b0322 
							
						 
					 
					
						
						
							
							Restore changes from nn-beam-parser to spacy/_ml  
						
						 
						
						
						
					 
					
						2017-08-18 22:24:46 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							931509d96a 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						 
						
						
						
					 
					
						2017-08-18 21:57:15 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							263366729e 
							
						 
					 
					
						
						
							
							Don't import BiLSTM  
						
						 
						
						
						
					 
					
						2017-08-18 21:56:31 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85794c1167 
							
						 
					 
					
						
						
							
							Restore state of _ml.py  
						
						 
						
						
						
					 
					
						2017-08-18 14:55:23 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							426f84937f 
							
						 
					 
					
						
						
							
							Resolve conflicts when merging new beam parsing stuff  
						
						 
						
						
						
					 
					
						2017-08-18 13:38:32 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5181e8bedb 
							
						 
					 
					
						
						
							
							Fix merge conflict in _ml  
						
						 
						
						
						
					 
					
						2017-08-18 13:35:51 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4b1e7bd6d8 
							
						 
					 
					
						
						
							
							Improve tensorizer model  
						
						 
						
						
						
					 
					
						2017-08-16 18:25:20 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6259490347 
							
						 
					 
					
						
						
							
							Fix mixture weights in fine_tune  
						
						 
						
						
						
					 
					
						2017-08-14 17:55:18 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							335fa8b05c 
							
						 
					 
					
						
						
							
							Fix gradient in fine_tune  
						
						 
						
						
						
					 
					
						2017-08-14 14:55:47 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							52c180ecf5 
							
						 
					 
					
						
						
							
							Revert "Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop"  
						
						 
						
						... 
						
						
						
						This reverts commit ea8de11ad5 , reversing
changes made to 08e443e083 . 
						
					 
					
						2017-08-14 13:00:23 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac6c25f762 
							
						 
					 
					
						
						
							
							Check SGD is not None in update  
						
						 
						
						
						
					 
					
						2017-08-14 12:09:18 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ab0c8c8e9 
							
						 
					 
					
						
						
							
							Try different drop_layer structure in Tok2Vec  
						
						 
						
						
						
					 
					
						2017-08-12 08:56:57 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ebe0f7f641 
							
						 
					 
					
						
						
							
							Pass embed size correctly in tagger, and cache embeddings for efficiency  
						
						 
						
						
						
					 
					
						2017-08-12 05:45:20 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f93f2bed58 
							
						 
					 
					
						
						
							
							Revert use of layer normalization in Tok2Vec  
						
						 
						
						
						
					 
					
						2017-08-09 17:47:03 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac2de6dced 
							
						 
					 
					
						
						
							
							Switch to ReLu layers in Tok2Vec  
						
						 
						
						
						
					 
					
						2017-08-09 16:41:25 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							88bf1cf87c 
							
						 
					 
					
						
						
							
							Update parser for fine tuning  
						
						 
						
						
						
					 
					
						2017-08-08 15:34:17 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d837c3776 
							
						 
					 
					
						
						
							
							Add mix weights on fine_tune  
						
						 
						
						
						
					 
					
						2017-08-07 06:32:59 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ed203de25 
							
						 
					 
					
						
						
							
							Use LayerNorm and SELU in Tok2Vec  
						
						 
						
						
						
					 
					
						2017-08-06 18:33:18 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4a5cc89138 
							
						 
					 
					
						
						
							
							Fix tagger 'fine_tune', to keep private CNN weights  
						
						 
						
						
						
					 
					
						2017-08-06 14:15:48 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4cfb7a54e7 
							
						 
					 
					
						
						
							
							Fix tagger  
						
						 
						
						
						
					 
					
						2017-08-06 01:53:31 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e9ab800e15 
							
						 
					 
					
						
						
							
							Fix tagging model  
						
						 
						
						
						
					 
					
						2017-08-06 01:50:08 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							468c138ab3 
							
						 
					 
					
						
						
							
							WIP: Add fine-tuning logic to tagger model, re  #1182  
						
						 
						
						
						
					 
					
						2017-08-06 01:13:23 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							523b0df2c9 
							
						 
					 
					
						
						
							
							Update text classification model  
						
						 
						
						
						
					 
					
						2017-07-25 18:57:59 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2df563ad24 
							
						 
					 
					
						
						
							
							Remove optimization for textcat that caused loading problem  
						
						 
						
						
						
					 
					
						2017-07-23 14:10:51 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ded0df5e2f 
							
						 
					 
					
						
						
							
							Expose hyper-param as keyword arg  
						
						 
						
						
						
					 
					
						2017-07-22 20:14:37 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6ffec9dfea 
							
						 
					 
					
						
						
							
							Update _ml, for textcat model  
						
						 
						
						
						
					 
					
						2017-07-22 20:03:40 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							727481377e 
							
						 
					 
					
						
						
							
							Add text-classifer thinc models  
						
						 
						
						
						
					 
					
						2017-07-20 00:17:17 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a17b99b1c 
							
						 
					 
					
						
						
							
							Use NORM attribute, not LOWER  
						
						 
						
						
						
					 
					
						2017-06-03 15:30:16 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b92a89f87b 
							
						 
					 
					
						
						
							
							Make it easier to reference embedding tables  
						
						 
						
						
						
					 
					
						2017-05-29 17:53:29 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c91b121aeb 
							
						 
					 
					
						
						
							
							Move serialization functions to util  
						
						 
						
						
						
					 
					
						2017-05-29 10:13:42 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1fa2bfb600 
							
						 
					 
					
						
						
							
							Add model_to_bytes and model_from_bytes helpers. Probably belong in thinc.  
						
						 
						
						
						
					 
					
						2017-05-29 09:27:04 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6dad4117ad 
							
						 
					 
					
						
						
							
							Work on serialization for models  
						
						 
						
						
						
					 
					
						2017-05-29 01:37:57 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8de9829f09 
							
						 
					 
					
						
						
							
							Don't overwrite model in initialization, when loading  
						
						 
						
						
						
					 
					
						2017-05-27 15:50:40 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b27c587800 
							
						 
					 
					
						
						
							
							Fix pieces argument to PrecomputedMaxout  
						
						 
						
						
						
					 
					
						2017-05-25 06:46:59 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c998776c25 
							
						 
					 
					
						
						
							
							Make single array for features, to reduce GPU copies  
						
						 
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8904814c0e 
							
						 
					 
					
						
						
							
							Add missing import  
						
						 
						
						
						
					 
					
						2017-05-21 09:07:56 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3b7c108246 
							
						 
					 
					
						
						
							
							Pass tokvecs through as a list, instead of concatenated. Also fix padding  
						
						 
						
						
						
					 
					
						2017-05-20 13:23:32 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b272890a8c 
							
						 
					 
					
						
						
							
							Try to move parser to simpler PrecomputedAffine class. Currently broken -- maybe the previous change  
						
						 
						
						
						
					 
					
						2017-05-20 06:40:10 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a438cef8c5 
							
						 
					 
					
						
						
							
							Fix significant bug in feature calculation -- off by 1  
						
						 
						
						
						
					 
					
						2017-05-18 06:21:32 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							711ad5edc4 
							
						 
					 
					
						
						
							
							Cache features in doc2feats  
						
						 
						
						
						
					 
					
						2017-05-18 04:22:20 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5211645af3 
							
						 
					 
					
						
						
							
							Get data flowing through pipeline. Needs redesign  
						
						 
						
						
						
					 
					
						2017-05-16 11:21:59 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a9edb3aa1d 
							
						 
					 
					
						
						
							
							Improve integration of NN parser, to support unified training API  
						
						 
						
						
						
					 
					
						2017-05-15 21:53:27 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							827b5af697 
							
						 
					 
					
						
						
							
							Update draft of parser neural network model  
						
						 
						
						... 
						
						
						
						Model is good, but code is messy. Currently requires Chainer, which may cause the build to fail on machines without a GPU.
Outline of the model:
We first predict context-sensitive vectors for each word in the input:
(embed_lower | embed_prefix | embed_suffix | embed_shape)
>> Maxout(token_width)
>> convolution ** 4
This convolutional layer is shared between the tagger and the parser. This prevents the parser from needing tag features.
To boost the representation, we make a "super tag" with POS, morphology and dependency label. The tagger predicts this
by adding a softmax layer onto the convolutional layer --- so, we're teaching the convolutional layer to give us a
representation that's one affine transform from this informative lexical information. This is obviously good for the
parser (which backprops to the convolutions too).
The parser model makes a state vector by concatenating the vector representations for its context tokens. Current
results suggest few context tokens works well. Maybe this is a bug.
The current context tokens:
* S0, S1, S2: Top three words on the stack
* B0, B1: First two words of the buffer
* S0L1, S0L2: Leftmost and second leftmost children of S0
* S0R1, S0R2: Rightmost and second rightmost children of S0
* S1L1, S1L2, S1R2, S1R, B0L1, B0L2: Likewise for S1 and B0
This makes the state vector quite long: 13*T, where T is the token vector width (128 is working well). Fortunately,
there's a way to structure the computation to save some expense (and make it more GPU friendly).
The parser typically visits 2*N states for a sentence of length N (although it may visit more, if it back-tracks
with a non-monotonic transition). A naive implementation would require 2*N (B, 13*T) @ (13*T, H) matrix multiplications
for a batch of size B. We can instead perform one (B*N, T) @ (T, 13*H) multiplication, to pre-compute the hidden
weights for each positional feature wrt the words in the batch. (Note that our token vectors come from the CNN
-- so we can't play this trick over the vocabulary. That's how Stanford's NN parser works --- and why its model
is so big.)
This pre-computation strategy allows a nice compromise between GPU-friendliness and implementation simplicity.
The CNN and the wide lower layer are computed on the GPU, and then the precomputed hidden weights are moved
to the CPU, before we start the transition-based parsing process. This makes a lot of things much easier.
We don't have to worry about variable-length batch sizes, and we don't have to implement the dynamic oracle
in CUDA to train.
Currently the parser's loss function is multilabel log loss, as the dynamic oracle allows multiple states to
be 0 cost. This is defined as:
(exp(score) / Z) - (exp(score) / gZ)
Where gZ is the sum of the scores assigned to gold classes. I'm very interested in regressing on the cost directly,
but so far this isn't working well.
Machinery is in place for beam-search, which has been working well for the linear model. Beam search should benefit
greatly from the pre-computation trick. 
						
					 
					
						2017-05-12 16:09:15 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bef89ef23d 
							
						 
					 
					
						
						
							
							Mergery  
						
						 
						
						
						
					 
					
						2017-05-08 08:29:36 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							56073a11ef 
							
						 
					 
					
						
						
							
							Don't use tags when calculating token vectors  
						
						 
						
						
						
					 
					
						2017-05-08 07:52:24 -05:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a66a4a4d0f 
							
						 
					 
					
						
						
							
							Replace einsums  
						
						 
						
						
						
					 
					
						2017-05-08 14:46:50 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							807cb2e370 
							
						 
					 
					
						
						
							
							Add PretrainableMaxouts  
						
						 
						
						
						
					 
					
						2017-05-08 14:24:43 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e2268a442 
							
						 
					 
					
						
						
							
							Precomputable hidden now working  
						
						 
						
						
						
					 
					
						2017-05-08 11:36:37 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10682d35ab 
							
						 
					 
					
						
						
							
							Get pre-computed version working  
						
						 
						
						
						
					 
					
						2017-05-08 00:38:35 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							12039e80ca 
							
						 
					 
					
						
						
							
							Switch to single matmul for state layer  
						
						 
						
						
						
					 
					
						2017-05-07 14:26:34 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f99f5b75dc 
							
						 
					 
					
						
						
							
							working residual net  
						
						 
						
						
						
					 
					
						2017-05-07 03:57:26 +02:00  
					
					
						 
						
							
							
							 
						
					 
				 
			
				
					
						
							
							
								 
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdf2dba9fb 
							
						 
					 
					
						
						
							
							WIP on refactor, with hidde pre-computing  
						
						 
						
						
						
					 
					
						2017-05-07 02:02:43 +02:00