Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							33e5ec737f 
							
						 
					 
					
						
						
							
							Fix to/from disk methods  
						
						
						
					 
					
						2017-05-31 13:43:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							53a3824334 
							
						 
					 
					
						
						
							
							Fix mistake in ner feature  
						
						
						
					 
					
						2017-05-31 03:01:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cc911feab2 
							
						 
					 
					
						
						
							
							Fix bug in NER state  
						
						
						
					 
					
						2017-05-30 22:12:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							be4a640f0c 
							
						 
					 
					
						
						
							
							Fix arc eager label costs for uint64  
						
						
						
					 
					
						2017-05-30 20:37:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aa4c33914b 
							
						 
					 
					
						
						
							
							Work on serialization  
						
						
						
					 
					
						2017-05-29 08:40:45 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59f355d525 
							
						 
					 
					
						
						
							
							Fixes for serialization  
						
						
						
					 
					
						2017-05-29 13:38:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ff26aa6c37 
							
						 
					 
					
						
						
							
							Work on to/from bytes/disk serialization methods  
						
						
						
					 
					
						2017-05-29 11:45:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6b019b0540 
							
						 
					 
					
						
						
							
							Update to/from bytes methods  
						
						
						
					 
					
						2017-05-29 10:14:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9239f06ed3 
							
						 
					 
					
						
						
							
							Fix german noun chunks iterator  
						
						
						
					 
					
						2017-05-28 20:13:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd9b6722a9 
							
						 
					 
					
						
						
							
							Fix noun chunks iterator for new stringstore  
						
						
						
					 
					
						2017-05-28 20:12:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7996d21717 
							
						 
					 
					
						
						
							
							Fixes for new StringStore  
						
						
						
					 
					
						2017-05-28 11:09:27 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a24c60c1e 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-28 08:12:05 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bc97bc292c 
							
						 
					 
					
						
						
							
							Fix __call__ method  
						
						
						
					 
					
						2017-05-28 08:11:58 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							84e66ca6d4 
							
						 
					 
					
						
						
							
							WIP on stringstore change. 27 failures  
						
						
						
					 
					
						2017-05-28 14:06:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							39293ab2ee 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-28 11:46:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dd052572d4 
							
						 
					 
					
						
						
							
							Update arc eager for SBD changes  
						
						
						
					 
					
						2017-05-28 11:46:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1263a844b 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-27 18:32:57 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e711c3476 
							
						 
					 
					
						
						
							
							Divide d_loss by batch size  
						
						
						
					 
					
						2017-05-27 18:32:46 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a1d4c97fb7 
							
						 
					 
					
						
						
							
							Improve correctness of minibatching  
						
						
						
					 
					
						2017-05-27 17:59:00 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							49235017bf 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-27 16:34:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ebd26b8aa 
							
						 
					 
					
						
						
							
							Use ordered dict to specify transitions  
						
						
						
					 
					
						2017-05-27 15:52:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3eea5383a1 
							
						 
					 
					
						
						
							
							Add move_names property to parser  
						
						
						
					 
					
						2017-05-27 15:51:55 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							99316fa631 
							
						 
					 
					
						
						
							
							Use ordered dict to specify actions  
						
						
						
					 
					
						2017-05-27 15:50:21 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							655ca58c16 
							
						 
					 
					
						
						
							
							Clarifying change to StateC.clone  
						
						
						
					 
					
						2017-05-27 15:49:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d22fcaf0b 
							
						 
					 
					
						
						
							
							Return None from parser if there are no annotations  
						
						
						
					 
					
						2017-05-26 14:02:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d5a536eaa 
							
						 
					 
					
						
						
							
							Improve efficiency of parser batching  
						
						
						
					 
					
						2017-05-26 11:31:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2cb7cc2db7 
							
						 
					 
					
						
						
							
							Remove commented code from parser  
						
						
						
					 
					
						2017-05-25 14:55:09 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c245ff6b27 
							
						 
					 
					
						
						
							
							Rebatch parser inputs, with mid-sentence states  
						
						
						
					 
					
						2017-05-25 11:18:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							679efe79c8 
							
						 
					 
					
						
						
							
							Make parser update less hacky  
						
						
						
					 
					
						2017-05-25 06:49:00 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e1cb5be0c7 
							
						 
					 
					
						
						
							
							Adjust dropout, depth and multi-task in parser  
						
						
						
					 
					
						2017-05-24 20:11:41 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							620df0414f 
							
						 
					 
					
						
						
							
							Fix dropout in parser  
						
						
						
					 
					
						2017-05-23 15:20:45 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8026c183d0 
							
						 
					 
					
						
						
							
							Add hacky logic to accelerate depth=0 case in parser  
						
						
						
					 
					
						2017-05-23 11:06:49 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a8b6d11c5b 
							
						 
					 
					
						
						
							
							Support optional maxout layer  
						
						
						
					 
					
						2017-05-23 05:58:07 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c55b8fa7c5 
							
						 
					 
					
						
						
							
							Fix bugs in parse_batch  
						
						
						
					 
					
						2017-05-23 05:57:52 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							964707d795 
							
						 
					 
					
						
						
							
							Restore support for deeper networks in parser  
						
						
						
					 
					
						2017-05-23 05:31:13 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6b918cc58e 
							
						 
					 
					
						
						
							
							Support making updates periodically during training  
						
						
						
					 
					
						2017-05-23 04:23:29 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3f725ff7b3 
							
						 
					 
					
						
						
							
							Roll back changes to parser update  
						
						
						
					 
					
						2017-05-23 04:23:05 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3959d778ac 
							
						 
					 
					
						
						
							
							Revert "Revert "WIP on improving parser efficiency""  
						
						... 
						
						
						
						This reverts commit 532afef4a8 
						
					 
					
						2017-05-23 03:06:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							532afef4a8 
							
						 
					 
					
						
						
							
							Revert "WIP on improving parser efficiency"  
						
						... 
						
						
						
						This reverts commit bdaac7ab44 
						
					 
					
						2017-05-23 03:05:25 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdaac7ab44 
							
						 
					 
					
						
						
							
							WIP on improving parser efficiency  
						
						
						
					 
					
						2017-05-23 02:59:31 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a9e318deb 
							
						 
					 
					
						
						
							
							Put the parsing loop in a nogil prange block  
						
						
						
					 
					
						2017-05-22 17:58:12 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e2136232f9 
							
						 
					 
					
						
						
							
							Exclude states with no matching gold annotations from parsing  
						
						
						
					 
					
						2017-05-22 10:30:12 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f00f821496 
							
						 
					 
					
						
						
							
							Fix pseudoprojectivity->nonproj  
						
						
						
					 
					
						2017-05-22 06:14:42 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d59e74cf6 
							
						 
					 
					
						
						
							
							PseudoProjectivity->nonproj  
						
						
						
					 
					
						2017-05-22 05:49:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b45b4aa392 
							
						 
					 
					
						
						
							
							PseudoProjectivity --> nonproj  
						
						
						
					 
					
						2017-05-22 05:17:44 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aae97f00e9 
							
						 
					 
					
						
						
							
							Fix nonproj import  
						
						
						
					 
					
						2017-05-22 05:15:06 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2a5eb9f61e 
							
						 
					 
					
						
						
							
							Make nonproj methods top-level functions, instead of class methods  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							33e2222839 
							
						 
					 
					
						
						
							
							Remove unused code in deprojectivize  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							025d9bbc37 
							
						 
					 
					
						
						
							
							Fix handling of non-projective deps  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1b5fa68996 
							
						 
					 
					
						
						
							
							Do pseudo-projective pre-processing for parser  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1d5d9838a2 
							
						 
					 
					
						
						
							
							Fix action collection for parser  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3b7c108246 
							
						 
					 
					
						
						
							
							Pass tokvecs through as a list, instead of concatenated. Also fix padding  
						
						
						
					 
					
						2017-05-20 13:23:32 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d52b65aec2 
							
						 
					 
					
						
						
							
							Revert "Move to contiguous buffer for token_ids and d_vectors"  
						
						... 
						
						
						
						This reverts commit 3ff8c35a79 
						
					 
					
						2017-05-20 11:26:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b272890a8c 
							
						 
					 
					
						
						
							
							Try to move parser to simpler PrecomputedAffine class. Currently broken -- maybe the previous change  
						
						
						
					 
					
						2017-05-20 06:40:10 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ff8c35a79 
							
						 
					 
					
						
						
							
							Move to contiguous buffer for token_ids and d_vectors  
						
						
						
					 
					
						2017-05-20 04:17:30 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8b04b0af9f 
							
						 
					 
					
						
						
							
							Remove freqs from transition_system  
						
						
						
					 
					
						2017-05-20 02:20:48 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a1ba20e2b1 
							
						 
					 
					
						
						
							
							Fix over-run on parse_batch  
						
						
						
					 
					
						2017-05-19 18:57:30 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e84de028b5 
							
						 
					 
					
						
						
							
							Remove 'rebatch' op, and remove min-batch cap  
						
						
						
					 
					
						2017-05-19 18:16:36 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c12ab47a56 
							
						 
					 
					
						
						
							
							Remove state argument in pipeline. Other changes  
						
						
						
					 
					
						2017-05-19 13:26:36 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2c825127a 
							
						 
					 
					
						
						
							
							Fix use_params and pipe methods  
						
						
						
					 
					
						2017-05-18 08:30:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc8d3a112c 
							
						 
					 
					
						
						
							
							Add util.env_opt support: Can set hyper params through environment variables.  
						
						
						
					 
					
						2017-05-18 04:36:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d2626fdb45 
							
						 
					 
					
						
						
							
							Fix name error in nn parser  
						
						
						
					 
					
						2017-05-18 04:31:01 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							793430aa7a 
							
						 
					 
					
						
						
							
							Get spaCy train command working with neural network  
						
						... 
						
						
						
						* Integrate models into pipeline
* Add basic serialization (maybe incorrect)
* Fix pickle on vocab 
						
					 
					
						2017-05-17 12:04:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8cf097ca88 
							
						 
					 
					
						
						
							
							Redesign training to integrate NN components  
						
						... 
						
						
						
						* Obsolete .parser, .entity etc names in favour of .pipeline
* Components no longer create models on initialization
* Models created by loading method (from_disk(), from_bytes() etc), or
    .begin_training()
* Add .predict(), .set_annotations() methods in components
* Pass state through pipeline, to allow components to share information
    more flexibly. 
						
					 
					
						2017-05-16 16:17:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5211645af3 
							
						 
					 
					
						
						
							
							Get data flowing through pipeline. Needs redesign  
						
						
						
					 
					
						2017-05-16 11:21:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a9edb3aa1d 
							
						 
					 
					
						
						
							
							Improve integration of NN parser, to support unified training API  
						
						
						
					 
					
						2017-05-15 21:53:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4b9d69f428 
							
						 
					 
					
						
						
							
							Merge branch 'v2' into develop  
						
						... 
						
						
						
						* Move v2 parser into nn_parser.pyx
* New TokenVectorEncoder class in pipeline.pyx
* New spacy/_ml.py module
Currently the two parsers live side-by-side, until we figure out how to
organize them. 
						
					 
					
						2017-05-14 01:10:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5cac951a16 
							
						 
					 
					
						
						
							
							Move new parser to nn_parser.pyx, and restore old parser, to make tests pass.  
						
						
						
					 
					
						2017-05-14 00:55:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f8c02b4341 
							
						 
					 
					
						
						
							
							Remove cupy imports from parser, so it can work on CPU  
						
						
						
					 
					
						2017-05-14 00:37:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e6d71e1778 
							
						 
					 
					
						
						
							
							Small fixes to parser  
						
						
						
					 
					
						2017-05-13 17:19:04 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							188c0f6949 
							
						 
					 
					
						
						
							
							Clean up unused import  
						
						
						
					 
					
						2017-05-13 17:18:27 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f85c8464f7 
							
						 
					 
					
						
						
							
							Draft support of regression loss in parser  
						
						
						
					 
					
						2017-05-13 17:17:27 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							827b5af697 
							
						 
					 
					
						
						
							
							Update draft of parser neural network model  
						
						... 
						
						
						
						Model is good, but code is messy. Currently requires Chainer, which may cause the build to fail on machines without a GPU.
Outline of the model:
We first predict context-sensitive vectors for each word in the input:
(embed_lower | embed_prefix | embed_suffix | embed_shape)
>> Maxout(token_width)
>> convolution ** 4
This convolutional layer is shared between the tagger and the parser. This prevents the parser from needing tag features.
To boost the representation, we make a "super tag" with POS, morphology and dependency label. The tagger predicts this
by adding a softmax layer onto the convolutional layer --- so, we're teaching the convolutional layer to give us a
representation that's one affine transform from this informative lexical information. This is obviously good for the
parser (which backprops to the convolutions too).
The parser model makes a state vector by concatenating the vector representations for its context tokens. Current
results suggest few context tokens works well. Maybe this is a bug.
The current context tokens:
* S0, S1, S2: Top three words on the stack
* B0, B1: First two words of the buffer
* S0L1, S0L2: Leftmost and second leftmost children of S0
* S0R1, S0R2: Rightmost and second rightmost children of S0
* S1L1, S1L2, S1R2, S1R, B0L1, B0L2: Likewise for S1 and B0
This makes the state vector quite long: 13*T, where T is the token vector width (128 is working well). Fortunately,
there's a way to structure the computation to save some expense (and make it more GPU friendly).
The parser typically visits 2*N states for a sentence of length N (although it may visit more, if it back-tracks
with a non-monotonic transition). A naive implementation would require 2*N (B, 13*T) @ (13*T, H) matrix multiplications
for a batch of size B. We can instead perform one (B*N, T) @ (T, 13*H) multiplication, to pre-compute the hidden
weights for each positional feature wrt the words in the batch. (Note that our token vectors come from the CNN
-- so we can't play this trick over the vocabulary. That's how Stanford's NN parser works --- and why its model
is so big.)
This pre-computation strategy allows a nice compromise between GPU-friendliness and implementation simplicity.
The CNN and the wide lower layer are computed on the GPU, and then the precomputed hidden weights are moved
to the CPU, before we start the transition-based parsing process. This makes a lot of things much easier.
We don't have to worry about variable-length batch sizes, and we don't have to implement the dynamic oracle
in CUDA to train.
Currently the parser's loss function is multilabel log loss, as the dynamic oracle allows multiple states to
be 0 cost. This is defined as:
(exp(score) / Z) - (exp(score) / gZ)
Where gZ is the sum of the scores assigned to gold classes. I'm very interested in regressing on the cost directly,
but so far this isn't working well.
Machinery is in place for beam-search, which has been working well for the linear model. Beam search should benefit
greatly from the pre-computation trick. 
						
					 
					
						2017-05-12 16:09:15 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b44f7e259c 
							
						 
					 
					
						
						
							
							Clean up unused parser code  
						
						
						
					 
					
						2017-05-08 15:42:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17efb1c001 
							
						 
					 
					
						
						
							
							Change width  
						
						
						
					 
					
						2017-05-08 08:40:13 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bef89ef23d 
							
						 
					 
					
						
						
							
							Mergery  
						
						
						
					 
					
						2017-05-08 08:29:36 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							50ddc9fc45 
							
						 
					 
					
						
						
							
							Fix infinite loop bug  
						
						
						
					 
					
						2017-05-08 07:54:26 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a66a4a4d0f 
							
						 
					 
					
						
						
							
							Replace einsums  
						
						
						
					 
					
						2017-05-08 14:46:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8d2eab74da 
							
						 
					 
					
						
						
							
							Use PretrainableMaxouts  
						
						
						
					 
					
						2017-05-08 14:24:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e2268a442 
							
						 
					 
					
						
						
							
							Precomputable hidden now working  
						
						
						
					 
					
						2017-05-08 11:36:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10682d35ab 
							
						 
					 
					
						
						
							
							Get pre-computed version working  
						
						
						
					 
					
						2017-05-08 00:38:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35458987e8 
							
						 
					 
					
						
						
							
							Checkpoint -- nearly finished reimpl  
						
						
						
					 
					
						2017-05-07 23:05:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4441866f55 
							
						 
					 
					
						
						
							
							Checkpoint -- nearly finished reimpl  
						
						
						
					 
					
						2017-05-07 22:47:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6782eedf9b 
							
						 
					 
					
						
						
							
							Tmp GPU code  
						
						
						
					 
					
						2017-05-07 11:04:24 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e420e5a809 
							
						 
					 
					
						
						
							
							Tmp  
						
						
						
					 
					
						2017-05-07 07:31:09 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							700979fb3c 
							
						 
					 
					
						
						
							
							CPU/GPU compat  
						
						
						
					 
					
						2017-05-07 04:01:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f99f5b75dc 
							
						 
					 
					
						
						
							
							working residual net  
						
						
						
					 
					
						2017-05-07 03:57:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdf2dba9fb 
							
						 
					 
					
						
						
							
							WIP on refactor, with hidde pre-computing  
						
						
						
					 
					
						2017-05-07 02:02:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b439e04f8d 
							
						 
					 
					
						
						
							
							Learning smoothly  
						
						
						
					 
					
						2017-05-06 20:38:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							08bee76790 
							
						 
					 
					
						
						
							
							Learns things  
						
						
						
					 
					
						2017-05-06 18:24:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bcf4cd0a5f 
							
						 
					 
					
						
						
							
							Learns things  
						
						
						
					 
					
						2017-05-06 17:37:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e48b58cd6 
							
						 
					 
					
						
						
							
							Gradients look correct  
						
						
						
					 
					
						2017-05-06 16:47:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7e04260d38 
							
						 
					 
					
						
						
							
							Data running through, likely errors in model  
						
						
						
					 
					
						2017-05-06 14:22:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef4fa594aa 
							
						 
					 
					
						
						
							
							Draft of NN parser, to be tested  
						
						
						
					 
					
						2017-05-05 19:20:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ccaf26206b 
							
						 
					 
					
						
						
							
							Pseudocode for parser  
						
						
						
					 
					
						2017-05-04 12:17:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2da16adcc2 
							
						 
					 
					
						
						
							
							Add dropout optin for parser and NER  
						
						... 
						
						
						
						Dropout can now be specified in the `Parser.update()` method via
the `drop` keyword argument, e.g.
    nlp.entity.update(doc, gold, drop=0.4)
This will randomly drop 40% of features, and multiply the value of the
others by 1. / 0.4. This may be useful for generalising from small data
sets.
This commit also patches the examples/training/train_new_entity_type.py
example, to use dropout and fix the output (previously it did not output
the learned entity). 
						
					 
					
						2017-04-27 13:18:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d2436dc17b 
							
						 
					 
					
						
						
							
							Update fix for Issue  #999  
						
						
						
					 
					
						2017-04-23 18:14:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							60703cede5 
							
						 
					 
					
						
						
							
							Ensure noun chunks can't be nested.  Closes   #955  
						
						
						
					 
					
						2017-04-23 17:56:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4eef200bab 
							
						 
					 
					
						
						
							
							Persist the actions within spacy.parser.cfg  
						
						
						
					 
					
						2017-04-20 17:02:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							137b210bcf 
							
						 
					 
					
						
						
							
							Restore use of FTRL training  
						
						
						
					 
					
						2017-04-16 18:02:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							45464d065e 
							
						 
					 
					
						
						
							
							Remove print statement  
						
						
						
					 
					
						2017-04-15 16:11:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c76cb8af35 
							
						 
					 
					
						
						
							
							Fix training for new labels  
						
						
						
					 
					
						2017-04-15 16:11:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4884b2c113 
							
						 
					 
					
						
						
							
							Refix StepwiseState  
						
						
						
					 
					
						2017-04-15 16:00:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1a98e48b8e 
							
						 
					 
					
						
						
							
							Fix Stepwisestate'  
						
						
						
					 
					
						2017-04-15 13:35:01 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0739ae7b76 
							
						 
					 
					
						
						
							
							Tidy up and fix formatting and imports  
						
						
						
					 
					
						2017-04-15 13:05:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							354458484c 
							
						 
					 
					
						
						
							
							WIP on add_label bug during NER training  
						
						... 
						
						
						
						Currently when a new label is introduced to NER during training,
it causes the labels to be read in in an unexpected order. This
invalidates the model. 
						
					 
					
						2017-04-14 23:52:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							49e2de900e 
							
						 
					 
					
						
						
							
							Add costs property to StepwiseState, to show which moves are gold.  
						
						
						
					 
					
						2017-04-10 11:37:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cc36c308f4 
							
						 
					 
					
						
						
							
							Fix noun_chunk rules around coordination  
						
						... 
						
						
						
						Closes  #693 . 
					
						2017-04-07 17:06:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1bb7b4ca71 
							
						 
					 
					
						
						
							
							Add comment  
						
						
						
					 
					
						2017-03-31 13:59:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							47a3ef06a6 
							
						 
					 
					
						
						
							
							Unhack deprojetivization, moving it into pipeline  
						
						... 
						
						
						
						Previously the deprojectivize() call was attached to the transition
system, and only called for German. Instead it should be a separate
process, called after the parser. This makes it available for any
language. Closes  #898 . 
						
					 
					
						2017-03-31 12:31:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a9b1f23c7d 
							
						 
					 
					
						
						
							
							Enable regression loss for parser  
						
						
						
					 
					
						2017-03-26 09:26:30 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b487b8735a 
							
						 
					 
					
						
						
							
							Decrease beam density, and fix Python 3 problem in beam  
						
						
						
					 
					
						2017-03-20 12:56:05 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c90dc7ac29 
							
						 
					 
					
						
						
							
							Clean up state initiatisation in transition system  
						
						
						
					 
					
						2017-03-16 11:59:11 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a46933a8fe 
							
						 
					 
					
						
						
							
							Clean up FTRL parsing stuff.  
						
						
						
					 
					
						2017-03-16 11:58:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2611ac2a89 
							
						 
					 
					
						
						
							
							Fix scorer bug for NER, related to ambiguity between missing annotations and misaligned tokens  
						
						
						
					 
					
						2017-03-16 09:38:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d0833c3df 
							
						 
					 
					
						
						
							
							Fix off-by-1 in parse features fill_context  
						
						
						
					 
					
						2017-03-15 19:55:35 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ef68c413f 
							
						 
					 
					
						
						
							
							Approximate cost in Break transition, to speed things up a bit.  
						
						
						
					 
					
						2017-03-15 16:40:27 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8543db8a5b 
							
						 
					 
					
						
						
							
							Use ftrl optimizer in parser  
						
						
						
					 
					
						2017-03-15 11:56:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d719f8e77e 
							
						 
					 
					
						
						
							
							Use nogil in parser, and set L1 to 0.0 by default  
						
						
						
					 
					
						2017-03-15 09:31:01 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c61c501406 
							
						 
					 
					
						
						
							
							Update beam-parser to allow parser to maintain nogil  
						
						
						
					 
					
						2017-03-15 09:30:22 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c79b3129e3 
							
						 
					 
					
						
						
							
							Fix setting of empty lexeme in initial parse state  
						
						
						
					 
					
						2017-03-15 09:26:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6c4108c073 
							
						 
					 
					
						
						
							
							Add header for beam parser  
						
						
						
					 
					
						2017-03-11 12:45:12 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							931feb3360 
							
						 
					 
					
						
						
							
							Allow beam parsing for NER  
						
						
						
					 
					
						2017-03-11 11:12:01 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca9c8c57c0 
							
						 
					 
					
						
						
							
							Add iteration argument to parser.update  
						
						
						
					 
					
						2017-03-11 07:00:47 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d59c6926c1 
							
						 
					 
					
						
						
							
							I think this fixes the segfault  
						
						
						
					 
					
						2017-03-11 06:58:34 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							318b9e32ff 
							
						 
					 
					
						
						
							
							WIP on beam parser. Currently segfaults.  
						
						
						
					 
					
						2017-03-11 06:19:52 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b0d80dc9ae 
							
						 
					 
					
						
						
							
							Update name of 'train' function in BeamParser  
						
						
						
					 
					
						2017-03-10 14:35:43 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d11f1a4ddf 
							
						 
					 
					
						
						
							
							Record negative costs in non-monotonic arc eager oracle  
						
						
						
					 
					
						2017-03-10 11:22:04 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ecf91a2dbb 
							
						 
					 
					
						
						
							
							Support beam parser  
						
						
						
					 
					
						2017-03-10 11:21:21 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c62da02344 
							
						 
					 
					
						
						
							
							Use ftrl training, to learn compressed model.  
						
						
						
					 
					
						2017-03-09 18:43:21 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							40703988bc 
							
						 
					 
					
						
						
							
							Use FTRL training in parser  
						
						
						
					 
					
						2017-03-08 01:38:51 +01:00 
						 
				 
			
				
					
						
							
							
								Roman Inflianskas 
							
						 
					 
					
						
						
						
						
							
						
						
							66e1109b53 
							
						 
					 
					
						
						
							
							Add support for Universal Dependencies v2.0  
						
						
						
					 
					
						2017-03-03 13:17:34 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							97a1286129 
							
						 
					 
					
						
						
							
							Revert changes to tagger and parser for thinc 6  
						
						
						
					 
					
						2017-01-09 10:08:34 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af81ac8bb0 
							
						 
					 
					
						
						
							
							Use thinc 6.0  
						
						
						
					 
					
						2016-12-29 11:58:42 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bc0a202c9c 
							
						 
					 
					
						
						
							
							Fix unicode problem in nonproj module  
						
						
						
					 
					
						2016-11-25 17:29:17 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							159e8c46e1 
							
						 
					 
					
						
						
							
							Merge old training fixes with newer state  
						
						
						
					 
					
						2016-11-25 09:16:36 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							39341598bb 
							
						 
					 
					
						
						
							
							Fix NER label calculation  
						
						
						
					 
					
						2016-11-25 09:02:22 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca773a1f53 
							
						 
					 
					
						
						
							
							Tweak arc_eager n_gold to deal with negative costs, and improve error message.  
						
						
						
					 
					
						2016-11-25 09:01:52 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							608d8f5421 
							
						 
					 
					
						
						
							
							Pass cfg through parser, and have is_valid default to 1, not 0 when resetting state  
						
						
						
					 
					
						2016-11-25 09:00:21 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b8c4f5ea76 
							
						 
					 
					
						
						
							
							Allow German noun chunks to work on Span  
						
						... 
						
						
						
						Update the German noun chunks iterator, so that it also works on Span objects. 
						
					 
					
						2016-11-24 23:30:15 +11:00 
						 
				 
			
				
					
						
							
							
								Pokey Rule 
							
						 
					 
					
						
						
						
						
							
						
						
							3e3bda142d 
							
						 
					 
					
						
						
							
							Add noun_chunks to Span  
						
						
						
					 
					
						2016-11-24 10:47:20 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b86f8af0c1 
							
						 
					 
					
						
						
							
							Fix doc strings  
						
						
						
					 
					
						2016-11-01 12:25:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							708ea22208 
							
						 
					 
					
						
						
							
							Infer types in transition_system.pyx  
						
						
						
					 
					
						2016-10-27 18:08:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							301f3cc898 
							
						 
					 
					
						
						
							
							Fix Issue  #429 . Add an initialize_state method to the named entity recogniser that adds missing entity types. This is a messy place to add this, because it's strange to have the method mutate state. A better home for this logic could be found.  
						
						
						
					 
					
						2016-10-27 18:01:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							03a520ec4f 
							
						 
					 
					
						
						
							
							Change signature of Parser.parseC, so that nr_class is read from the transition system. This allows the transition system to modify the number of actions in initialize_state.  
						
						
						
					 
					
						2016-10-27 17:58:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a209b10579 
							
						 
					 
					
						
						
							
							Improve error message when oracle fails for non-projective trees, re Issue  #571 .  
						
						
						
					 
					
						2016-10-24 20:31:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3e688e6d4b 
							
						 
					 
					
						
						
							
							Fix issue  #514  -- serializer fails when new entity type has been added. The fix here is quite ugly. It's best to add the entities ASAP after loading the NLP pipeline, to mitigate the brittleness.  
						
						
						
					 
					
						2016-10-23 17:45:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59038f7efa 
							
						 
					 
					
						
						
							
							Restore support for prior data format -- specifically, the labels field of the config.  
						
						
						
					 
					
						2016-10-17 00:53:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7887ab3b36 
							
						 
					 
					
						
						
							
							Fix default use of feature_templates in parser  
						
						
						
					 
					
						2016-10-16 21:41:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f787cd29fe 
							
						 
					 
					
						
						
							
							Refactor the pipeline classes to make them more consistent, and remove the redundant blank() constructor.  
						
						
						
					 
					
						2016-10-16 21:34:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							274a4d4272 
							
						 
					 
					
						
						
							
							Fix queue Python property in StateClass  
						
						
						
					 
					
						2016-10-16 17:04:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e8c8aa08ce 
							
						 
					 
					
						
						
							
							Make action_name optional in StepwiseState  
						
						
						
					 
					
						2016-10-16 17:04:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4fc56d4a31 
							
						 
					 
					
						
						
							
							Rename 'labels' to 'actions' in parser options  
						
						
						
					 
					
						2016-10-16 11:42:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3259a63779 
							
						 
					 
					
						
						
							
							Whitespace  
						
						
						
					 
					
						2016-10-16 01:47:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d9ae2d68af 
							
						 
					 
					
						
						
							
							Load features by string-name for backwards compatibility.  
						
						
						
					 
					
						2016-10-12 20:15:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3a03c668c3 
							
						 
					 
					
						
						
							
							Fix message in ParserStateError  
						
						
						
					 
					
						2016-10-12 14:44:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6bf505e865 
							
						 
					 
					
						
						
							
							Fix error on ParserStateError  
						
						
						
					 
					
						2016-10-12 14:35:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ea23b64cc8 
							
						 
					 
					
						
						
							
							Refactor training, with new spacy.train module. Defaults still a little awkward.  
						
						
						
					 
					
						2016-10-09 12:24:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1d70db58aa 
							
						 
					 
					
						
						
							
							Revert "Changes to iterators.pyx for new StringStore scheme"  
						
						... 
						
						
						
						This reverts commit 4f794b215a 
						
					 
					
						2016-09-30 20:19:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e09b39b9f 
							
						 
					 
					
						
						
							
							Revert "Changes to transition systems for new StringStore scheme"  
						
						... 
						
						
						
						This reverts commit 0442e0ab1e 
						
					 
					
						2016-09-30 20:11:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e3285f6f30 
							
						 
					 
					
						
						
							
							Revert "Fix report of ParserStateError"  
						
						... 
						
						
						
						This reverts commit 78f19baafa 
						
					 
					
						2016-09-30 20:11:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							78f19baafa 
							
						 
					 
					
						
						
							
							Fix report of ParserStateError  
						
						
						
					 
					
						2016-09-30 19:59:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0442e0ab1e 
							
						 
					 
					
						
						
							
							Changes to transition systems for new StringStore scheme  
						
						
						
					 
					
						2016-09-30 19:58:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4f794b215a 
							
						 
					 
					
						
						
							
							Changes to iterators.pyx for new StringStore scheme  
						
						
						
					 
					
						2016-09-30 19:57:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4cbf0d3bb6 
							
						 
					 
					
						
						
							
							Handle errors when no valid actions are available, pointing users to the issue tracker.  
						
						
						
					 
					
						2016-09-27 19:19:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							430473bd98 
							
						 
					 
					
						
						
							
							Raise errors when no actions are available, re Issue  #429  
						
						
						
					 
					
						2016-09-27 19:09:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e7df3c4ca 
							
						 
					 
					
						
						
							
							Expect the parser data, if parser.load() is called.  
						
						
						
					 
					
						2016-09-27 14:02:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a44763af0e 
							
						 
					 
					
						
						
							
							Fix Issue  #469 : Incorrectly cased root label in noun chunk iterator  
						
						
						
					 
					
						2016-09-27 13:13:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e07b9665f7 
							
						 
					 
					
						
						
							
							Don't expect parser model  
						
						
						
					 
					
						2016-09-26 18:09:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ee6fa106da 
							
						 
					 
					
						
						
							
							Fix parser features  
						
						
						
					 
					
						2016-09-26 17:57:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e607e4b598 
							
						 
					 
					
						
						
							
							Fix parser loading  
						
						
						
					 
					
						2016-09-26 17:51:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2debc4e0a2 
							
						 
					 
					
						
						
							
							Add .blank() method to Parser. Start housing default dep labels and entity types within the Defaults class.  
						
						
						
					 
					
						2016-09-26 11:57:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd65cf6cbb 
							
						 
					 
					
						
						
							
							Finish refactoring data loading  
						
						
						
					 
					
						2016-09-24 20:26:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83e364188c 
							
						 
					 
					
						
						
							
							Mostly finished loading refactoring. Design is in place, but doesn't work yet.  
						
						
						
					 
					
						2016-09-24 15:42:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							60fdf4d5f1 
							
						 
					 
					
						
						
							
							Remove commented out debuggng code  
						
						
						
					 
					
						2016-09-24 01:17:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							070af4af9d 
							
						 
					 
					
						
						
							
							Revert "* Working neural net, but features hacky. Switching to extractor."  
						
						... 
						
						
						
						This reverts commit 7c2f1a673b 
						
					 
					
						2016-09-21 12:26:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7c2f1a673b 
							
						 
					 
					
						
						
							
							* Working neural net, but features hacky. Switching to extractor.  
						
						
						
					 
					
						2016-05-26 19:06:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							13fad36e49 
							
						 
					 
					
						
						
							
							* Cosmetic change to english noun chunks iterator -- use enumerate instead of range loop  
						
						
						
					 
					
						2016-05-20 10:11:05 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							7b78239436 
							
						 
					 
					
						
						
							
							add fix for German noun chunk iterator (issue  #365 )  
						
						
						
					 
					
						2016-05-06 01:41:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb94022975 
							
						 
					 
					
						
						
							
							* Fix Issue  #365 : Error introduced during noun phrase chunking, due to use of corrected PRON/PROPN/etc tags.  
						
						
						
					 
					
						2016-05-06 00:21:05 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							dbf8f5f3ec 
							
						 
					 
					
						
						
							
							fix bug in StateC.set_break()  
						
						
						
					 
					
						2016-05-05 15:15:34 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							3c44b5dc1a 
							
						 
					 
					
						
						
							
							call deprojectivization after parsing  
						
						
						
					 
					
						2016-05-05 15:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							472f576b82 
							
						 
					 
					
						
						
							
							* Deprojectivize German parses  
						
						
						
					 
					
						2016-05-05 15:01:10 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							e4ea2bea01 
							
						 
					 
					
						
						
							
							fix whitespace  
						
						
						
					 
					
						2016-05-04 07:40:38 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							5bf2fd1f78 
							
						 
					 
					
						
						
							
							make the code less cryptic  
						
						
						
					 
					
						2016-05-03 17:19:05 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							a06fca9fdf 
							
						 
					 
					
						
						
							
							German noun chunk iterator now doesn't return tokens more than once  
						
						
						
					 
					
						2016-05-03 16:58:59 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							7b246c13cb 
							
						 
					 
					
						
						
							
							reformulate noun chunk tests for English  
						
						
						
					 
					
						2016-05-03 14:24:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f1532142f 
							
						 
					 
					
						
						
							
							* Fix cost calculation on non-monotonic oracle  
						
						
						
					 
					
						2016-05-03 00:21:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							508fd1f6dc 
							
						 
					 
					
						
						
							
							* Refactor noun chunk iterators, so that they're simple functions. Install the iterator when the Doc is created, but allow users to write to the noun_chunk_iterator attribute. The iterator functions accept an object and yield (int start, int end, int label) triples.  
						
						
						
					 
					
						2016-05-02 14:25:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							77609588b6 
							
						 
					 
					
						
						
							
							* Fix assignment of root label to words left as root implicitly, after parsing ends.  
						
						
						
					 
					
						2016-04-25 19:41:59 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7c2d2deaa7 
							
						 
					 
					
						
						
							
							* Revise transition system so that the Break transition retains sole responsibility for setting sentence boundaries. Re Issue  #322  
						
						
						
					 
					
						2016-04-25 19:41:59 +00:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							12024b0b0a 
							
						 
					 
					
						
						
							
							bugfix: introducing multiple roots now updates original head's properties  
						
						... 
						
						
						
						adjust tests to rely less on statistical model 
						
					 
					
						2016-04-20 16:42:41 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							b98cc3266d 
							
						 
					 
					
						
						
							
							bugfix: iterators now reset properly when called a second time  
						
						
						
					 
					
						2016-04-15 17:49:16 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							289b10f441 
							
						 
					 
					
						
						
							
							remove some comments  
						
						
						
					 
					
						2016-04-14 15:37:51 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							d99a9cbce9 
							
						 
					 
					
						
						
							
							different handling of space tokens  
						
						... 
						
						
						
						space tokens are now always attached to the previous non-space token
there are two exceptions:
leading space tokens are attached to the first following non-space token
in input that consists exclusively of space tokens, the last space token
is the head of all others. 
						
					 
					
						2016-04-13 15:28:28 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							d328e0b4a8 
							
						 
					 
					
						
						
							
							Merge branch 'master' into space_head_bug  
						
						
						
					 
					
						2016-04-11 12:11:01 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							80bea62842 
							
						 
					 
					
						
						
							
							bugfix in unit test  
						
						
						
					 
					
						2016-04-08 16:46:44 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							1fe911cdb0 
							
						 
					 
					
						
						
							
							bigfix  
						
						
						
					 
					
						2016-04-07 18:19:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							872695759d 
							
						 
					 
					
						
						
							
							Merge pull request  #306  from wbwseeker/german_noun_chunks  
						
						... 
						
						
						
						add German noun chunk functionality 
						
					 
					
						2016-04-08 00:54:24 +10:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							7195b6742d 
							
						 
					 
					
						
						
							
							add restrictions to L-arc and R-arc to prevent space heads  
						
						
						
					 
					
						2016-03-28 10:40:52 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							5e2e8e951a 
							
						 
					 
					
						
						
							
							add baseclass DocIterator for iterators over documents  
						
						... 
						
						
						
						add classes for English and German noun chunks
the respective iterators are set for the document when created by the parser
as they depend on the annotation scheme of the parsing model 
						
					 
					
						2016-03-16 15:53:35 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							46e3f979f1 
							
						 
					 
					
						
						
							
							add function for setting head and label to token  
						
						... 
						
						
						
						change PseudoProjectivity.deprojectivize to use these functions 
						
					 
					
						2016-03-11 17:31:06 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							7adbd7a785 
							
						 
					 
					
						
						
							
							replace Counter with normal dict  
						
						
						
					 
					
						2016-03-03 21:36:27 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							1ae487a4f6 
							
						 
					 
					
						
						
							
							add backwards compatibility with python 2.6  
						
						
						
					 
					
						2016-03-03 21:18:12 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							72b8df0684 
							
						 
					 
					
						
						
							
							turned PseudoProjectivity into a normal python class  
						
						
						
					 
					
						2016-03-03 19:05:08 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							690c5acabf 
							
						 
					 
					
						
						
							
							adjust train.py to train both english and german models  
						
						
						
					 
					
						2016-03-03 15:21:00 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							3448cb40a4 
							
						 
					 
					
						
						
							
							integrated pseudo-projective parsing into parser  
						
						... 
						
						
						
						- nonproj.pyx holds a class PseudoProjectivity which currently holds
  all functionality to implement Nivre & Nilsson 2005's pseudo-projective
  parsing using the HEAD decoration scheme
- changed lefts/rights in Token to account for possible non-projective
  structures 
						
					 
					
						2016-03-01 10:09:08 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							56b7210e82 
							
						 
					 
					
						
						
							
							moved nonproj.py to syntax/nonproj.pyx  
						
						
						
					 
					
						2016-02-25 15:08:49 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1b83cb9dfa 
							
						 
					 
					
						
						
							
							* Fix Issue  #251 : Incorrect right edge calculation on left-clobber low in the tree  
						
						
						
					 
					
						2016-02-07 00:00:42 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4412a70dc5 
							
						 
					 
					
						
						
							
							* Initialize StateC._empty_token to 0, to avoid undefined behaviour.  
						
						
						
					 
					
						2016-02-06 13:34:38 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1b41f868d2 
							
						 
					 
					
						
						
							
							* Check for errors in parser, and parallelise the left-over batch  
						
						
						
					 
					
						2016-02-06 10:06:30 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							165ca28b80 
							
						 
					 
					
						
						
							
							* Set is_parsed flag in Parser.pipe  
						
						
						
					 
					
						2016-02-05 19:51:44 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdd579db0a 
							
						 
					 
					
						
						
							
							* Set is_parsed flag in Parser.pipe  
						
						
						
					 
					
						2016-02-05 19:50:11 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b04c9aad71 
							
						 
					 
					
						
						
							
							* Fix off-by-one in Parser.pipe  
						
						
						
					 
					
						2016-02-05 19:37:50 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							048dfe35aa 
							
						 
					 
					
						
						
							
							* cimport cython.parallel  
						
						
						
					 
					
						2016-02-05 12:20:42 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a13cebdcc 
							
						 
					 
					
						
						
							
							* Update for modified thinc interface  
						
						
						
					 
					
						2016-02-05 11:44:39 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							84b247ef83 
							
						 
					 
					
						
						
							
							* Add a .pipe method, that takes a stream of input, operates on it, and streams the output. Internally, the stream may be buffered, to allow multi-threading.  
						
						
						
					 
					
						2016-02-03 02:10:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e3db39dd21 
							
						 
					 
					
						
						
							
							* Fix compiler warning about signed/unsigned comparison  
						
						
						
					 
					
						2016-02-01 09:08:07 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b3802562d6 
							
						 
					 
					
						
						
							
							Merge branch 'rethinc2' of  https://github.com/honnibal/spaCy  into rethinc2  
						
						
						
					 
					
						2016-02-01 08:59:24 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4b08a3fafd 
							
						 
					 
					
						
						
							
							* Fix merge conflict  
						
						
						
					 
					
						2016-02-01 08:58:18 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5188f6d9d8 
							
						 
					 
					
						
						
							
							* Fix parseC function  
						
						
						
					 
					
						2016-02-01 08:48:48 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bcf8f7ba40 
							
						 
					 
					
						
						
							
							* Add a parse_batch method to Parser, that releases the GIL around a batch of documents.  
						
						
						
					 
					
						2016-02-01 08:34:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d5579cd0d8 
							
						 
					 
					
						
						
							
							Merge branch 'rethinc2' of  https://github.com/honnibal/spaCy  into rethinc2  
						
						
						
					 
					
						2016-02-01 03:08:49 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							490ba65398 
							
						 
					 
					
						
						
							
							* Use openmp in parser  
						
						
						
					 
					
						2016-02-01 03:08:42 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cb78d91ec5 
							
						 
					 
					
						
						
							
							* Fix ArcEager.set_valid  
						
						
						
					 
					
						2016-02-01 03:07:37 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							28e5ad62bc 
							
						 
					 
					
						
						
							
							* Pass a StateC pointer into the transition and validation methods in the parser, so that the GIL can be released over a batch of documents  
						
						
						
					 
					
						2016-02-01 03:00:15 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a47f00901b 
							
						 
					 
					
						
						
							
							* Pass a StateC pointer into the transition and validation methods in the parser, so that the GIL can be released over a batch of documents  
						
						
						
					 
					
						2016-02-01 02:58:14 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							daaad66448 
							
						 
					 
					
						
						
							
							* Now fully proxied  
						
						
						
					 
					
						2016-02-01 02:37:08 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7a0e3bb9c1 
							
						 
					 
					
						
						
							
							* Continue proxying. Some problem currently  
						
						
						
					 
					
						2016-02-01 02:22:21 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2169bbb7ea 
							
						 
					 
					
						
						
							
							* Shadow StateClass with StateC, to start proxying  
						
						
						
					 
					
						2016-02-01 01:16:14 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2fa228458e 
							
						 
					 
					
						
						
							
							* Add _state file, which StateClass will proxy to  
						
						
						
					 
					
						2016-02-01 01:09:21 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9410e74c92 
							
						 
					 
					
						
						
							
							* Switch parser to use nogil functions  
						
						
						
					 
					
						2016-01-30 20:27:07 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10877a7791 
							
						 
					 
					
						
						
							
							* Update for thinc 5.0, including changing cost from int to weight_t, and updating the tagger and parser  
						
						
						
					 
					
						2016-01-30 14:31:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							84c5dfbfc3 
							
						 
					 
					
						
						
							
							* Clean up debugging python list  
						
						
						
					 
					
						2016-01-19 20:10:32 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04d0686b26 
							
						 
					 
					
						
						
							
							* Make TransitionSystem.add_action idempotent, i.e. ignore duplicate added actions.  
						
						
						
					 
					
						2016-01-19 20:10:04 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							65c5bc4988 
							
						 
					 
					
						
						
							
							* Add add_label method, to allow users to register new entity types and dependency labels.  
						
						
						
					 
					
						2016-01-19 19:11:02 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							151aa0b0e2 
							
						 
					 
					
						
						
							
							* Allow users to add_label, in order to extend the entity recogniser to new classes. Does not by itself add a class to the model  
						
						
						
					 
					
						2016-01-19 19:09:33 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c8e0011ebc 
							
						 
					 
					
						
						
							
							* Add iterators to the NER and parser transition systems, to get the action types  
						
						
						
					 
					
						2016-01-19 19:07:43 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04177debd0 
							
						 
					 
					
						
						
							
							* Unwind limit to sentence boundary detection that prevents it from inserting boundaries on whitespace. Replace it with a check for whitespace in StateClass.fast_forward, so that whitespace is LeftArced when it's on the stack. This should prevent the previous problem of whitespace-only sentences. Should fix Issue  #184 , but may cause further problems. Needs testing.  
						
						
						
					 
					
						2016-01-19 02:54:15 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3dc398b727 
							
						 
					 
					
						
						
							
							* Fix merge conflict in requirements.txt  
						
						
						
					 
					
						2016-01-16 16:20:49 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c025a0c64b 
							
						 
					 
					
						
						
							
							* Check for KeyboardInerrupt in parser.__call__  
						
						
						
					 
					
						2016-01-16 16:18:44 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aec130af56 
							
						 
					 
					
						
						
							
							Use util.Package class for io  
						
						... 
						
						
						
						Previous Sputnik integration caused API change: Vocab, Tagger, etc
were loaded via a from_package classmethod, that required a
sputnik.Package instance. This forced users to first create a
sputnik.Sputnik() instance, in order to acquire a Package via
sp.pool().
Instead I've created a small file-system shim, util.Package, which
allows classes to have a .load() classmethod, that accepts either
util.Package objects, or strings. We can later gut the internals
of this and make it a proxy for Sputnik if we need more functionality
that should live in the Sputnik library.
Sputnik is now only used to download and install the data, in
spacy.en.download 
						
					 
					
						2015-12-29 18:00:48 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5623242b3e 
							
						 
					 
					
						
						
							
							* Adjust NER rules, so that U entries in gazetteer don't become B moves to the model  
						
						
						
					 
					
						2015-11-12 04:48:23 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							44fbdc7260 
							
						 
					 
					
						
						
							
							* Fix bug in NER transition system, that sometimes left no valid moves  
						
						
						
					 
					
						2015-11-08 16:19:12 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e92371bb54 
							
						 
					 
					
						
						
							
							* Fix rule that made Last action invalid if there was a preset of O, since if the entity is already open, that ship has sailed.  
						
						
						
					 
					
						2015-11-08 22:17:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6f47074214 
							
						 
					 
					
						
						
							
							* Make constructor of ParserModel and TaggerModel the same as AveragedPerceptron, for each pickling.  
						
						
						
					 
					
						2015-11-07 18:25:17 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1cfa20fb17 
							
						 
					 
					
						
						
							
							* Fix sentence-final whitespace issue  
						
						
						
					 
					
						2015-11-07 17:34:46 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							888c05a7fa 
							
						 
					 
					
						
						
							
							* Fix variable naming in StepwiseState, for thinc 4.0  
						
						
						
					 
					
						2015-11-07 11:02:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc2185bfe3 
							
						 
					 
					
						
						
							
							* Fix variable naming in StepwiseState, for thinc 4.0  
						
						
						
					 
					
						2015-11-07 10:48:31 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							954442a807 
							
						 
					 
					
						
						
							
							* Fix variable naming in StepwiseState, for thinc 4.0  
						
						
						
					 
					
						2015-11-07 10:30:45 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af70dc166a 
							
						 
					 
					
						
						
							
							* Fix Last restriction, that was supposed to prevent conflicts with presets, but was incorrect.  
						
						
						
					 
					
						2015-11-07 09:52:00 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a06e3c8963 
							
						 
					 
					
						
						
							
							* Fix bone-headed mistake in StateClass.E  
						
						
						
					 
					
						2015-11-07 07:35:28 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d24b8509e4 
							
						 
					 
					
						
						
							
							* Correct screw ups from the previous commits  
						
						
						
					 
					
						2015-11-07 06:51:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5efad178b5 
							
						 
					 
					
						
						
							
							* Set ent tag when close entity  
						
						
						
					 
					
						2015-11-07 06:09:25 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9285f01d26 
							
						 
					 
					
						
						
							
							* Fix broken StateClass.E tracking  
						
						
						
					 
					
						2015-11-07 06:06:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							19136b0e7d 
							
						 
					 
					
						
						
							
							* Add better debug message for illegal move  
						
						
						
					 
					
						2015-11-07 05:34:37 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2733816b7b 
							
						 
					 
					
						
						
							
							* Fix whitespace  
						
						
						
					 
					
						2015-11-07 05:31:06 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							01ab464383 
							
						 
					 
					
						
						
							
							* Prevent Begin and In moves from applying in NER if we're at the last token of a sentence, as this would mean the entity would span over a sentence boundary. Re Issue  #169  
						
						
						
					 
					
						2015-11-07 05:30:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b65633f270 
							
						 
					 
					
						
						
							
							* Fix function that returns nth entity in StateClass. Was only returning the first.  
						
						
						
					 
					
						2015-11-07 05:29:11 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3c162dcac3 
							
						 
					 
					
						
						
							
							* Refactor away from the _ml module, to use thinc 4.0. Still some work needs to be done, e.g. to add __reduce__ to the models, more testing, etc.  
						
						
						
					 
					
						2015-11-07 03:24:30 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b9991fbd20 
							
						 
					 
					
						
						
							
							* Update to use thinc 3.0  
						
						
						
					 
					
						2015-11-06 00:25:59 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							68f479e821 
							
						 
					 
					
						
						
							
							* Rename Doc.data to Doc.c  
						
						
						
					 
					
						2015-11-04 00:15:14 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							329ae57520 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment thing  
						
						
						
					 
					
						2015-10-13 09:46:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							37919eac82 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment in simpler way. Leaves problem with setting left/right children.  
						
						
						
					 
					
						2015-10-13 18:23:24 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c70eb776ae 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment, so that left/right children are consistent with head.  
						
						
						
					 
					
						2015-10-13 15:58:22 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20fd36a0f7 
							
						 
					 
					
						
						
							
							* Very scrappy, likely buggy first-cut pickle implementation, to work on Issue  #125 : allow pickle for Apache Spark. The current implementation sends stuff to temp files, and does almost nothing to ensure all modifiable state is actually preserved. The Language() instance is a deep tree of extension objects, and if pickling during training, some of the C-data state is hard to preserve.  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9dd2f25c74 
							
						 
					 
					
						
						
							
							* Fix Issue  #131 : Force whitespace characters to attach syntactically to previous token, and ensure they cannot serve as stand-alone 'sentence' units.  
						
						
						
					 
					
						2015-10-10 15:53:30 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8b39feefbe 
							
						 
					 
					
						
						
							
							* Add dependency post-process rule to ensure spaces are attached to neighbouring tokens, so that they can't be sentence boundaries  
						
						
						
					 
					
						2015-10-10 15:32:13 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0e24d099a1 
							
						 
					 
					
						
						
							
							* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents?  
						
						
						
					 
					
						2015-09-09 03:40:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							86c888667f 
							
						 
					 
					
						
						
							
							* Merge in changes from de branch  
						
						
						
					 
					
						2015-09-06 19:49:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5edac11225 
							
						 
					 
					
						
						
							
							* Wrap self.parse in nogil, and break if an invalid move is predicted. The invalid break is a work-around that papers over likely bugs, but we can't easily break in the nogil block, and otherwise we'll get an infinite loop. Need to set this as an error flag.  
						
						
						
					 
					
						2015-09-06 04:15:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3d5e6c0dd 
							
						 
					 
					
						
						
							
							* Reform constructor and save/load workflow in parser model  
						
						
						
					 
					
						2015-08-26 19:19:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bf38b3b883 
							
						 
					 
					
						
						
							
							* Hack on l/r reversal bug  
						
						
						
					 
					
						2015-08-10 05:58:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6116413b47 
							
						 
					 
					
						
						
							
							* Fix label prediction in StepwiseState  
						
						
						
					 
					
						2015-08-10 05:05:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c9753eff2 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-10 00:09:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9de98f5a6f 
							
						 
					 
					
						
						
							
							* Add Parser.stepthrough method, with context manager  
						
						
						
					 
					
						2015-08-10 00:08:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fe43f8cf39 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-09 02:31:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c090945e0 
							
						 
					 
					
						
						
							
							* Add Parser.predict method, and clean up Parser.get_state  
						
						
						
					 
					
						2015-08-09 02:29:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04fccfb984 
							
						 
					 
					
						
						
							
							* Fix get_state for parser prediction  
						
						
						
					 
					
						2015-08-09 02:11:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							55fde0e240 
							
						 
					 
					
						
						
							
							* Fix get_state  
						
						
						
					 
					
						2015-08-09 01:45:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f0f4fa9838 
							
						 
					 
					
						
						
							
							* Fix Parser.get_state  
						
						
						
					 
					
						2015-08-09 01:40:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							18331dca89 
							
						 
					 
					
						
						
							
							* Add continue_for argument to parser 'partial' function, which is now renamed to get_state  
						
						
						
					 
					
						2015-08-09 01:31:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0653288fa5 
							
						 
					 
					
						
						
							
							* Fix stateclass.queue  
						
						
						
					 
					
						2015-08-09 00:39:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9de218b7ba 
							
						 
					 
					
						
						
							
							* Fix Parser.partial function  
						
						
						
					 
					
						2015-08-08 23:45:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cc9deae960 
							
						 
					 
					
						
						
							
							* Add is_valid method to transition_system  
						
						
						
					 
					
						2015-08-08 23:36:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2a46c77324 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-08 23:35:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7bafc789e7 
							
						 
					 
					
						
						
							
							* Add stack and queue properties to stateclass, for python access  
						
						
						
					 
					
						2015-08-08 23:32:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3af938365f 
							
						 
					 
					
						
						
							
							* Add function partial to Parser  
						
						
						
					 
					
						2015-08-08 23:32:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76a1f0481a 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-08 23:31:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59c3bf60a6 
							
						 
					 
					
						
						
							
							* Ensure entity recognizer doesn't over-write preset types  
						
						
						
					 
					
						2015-08-06 16:09:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c1724ecae 
							
						 
					 
					
						
						
							
							* Gazetteer stuff working, now need to wire up to API  
						
						
						
					 
					
						2015-08-06 00:35:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a8bbd7312c 
							
						 
					 
					
						
						
							
							* Hackishly patch long dependencies problem  
						
						
						
					 
					
						2015-07-28 00:14:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb583f7f09 
							
						 
					 
					
						
						
							
							* Hackishly patch long dependencies problem  
						
						
						
					 
					
						2015-07-27 23:14:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							823ef4a00b 
							
						 
					 
					
						
						
							
							* Remove profile declarations  
						
						
						
					 
					
						2015-07-25 18:13:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aa28e2e01d 
							
						 
					 
					
						
						
							
							* Release the GIL around parse function  
						
						
						
					 
					
						2015-07-24 04:53:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d5255aad77 
							
						 
					 
					
						
						
							
							* Update freqs for missing tags in ner, for serializer  
						
						
						
					 
					
						2015-07-23 01:17:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							12699a1152 
							
						 
					 
					
						
						
							
							* Set initial freqs, to avoid missing values in serializer  
						
						
						
					 
					
						2015-07-23 01:16:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							317cbbc015 
							
						 
					 
					
						
						
							
							* Serialization round trip now working with decent API, but with rough spots in the organisation and requiring vocabulary to be fixed ahead of time.  
						
						
						
					 
					
						2015-07-19 15:18:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b1d74ce60d 
							
						 
					 
					
						
						
							
							* Remove unused joint.pyx and joint.pxd files  
						
						
						
					 
					
						2015-07-17 23:31:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fb0a641a2d 
							
						 
					 
					
						
						
							
							* Don't release the gil around Parser.parse. Does this indicate thread problems?  
						
						
						
					 
					
						2015-07-17 23:07:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e29daea85f 
							
						 
					 
					
						
						
							
							* Fix bint/int typing problem in TransitionSystem. In C++ bint* means bool*, but in C it means int*. So, type-casting to bint* is unsafe.  
						
						
						
					 
					
						2015-07-17 22:37:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							45ae1ce428 
							
						 
					 
					
						
						
							
							* Remove unused declaration in parser  
						
						
						
					 
					
						2015-07-16 01:27:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9a8db9743c 
							
						 
					 
					
						
						
							
							* Remove gil from parser.call  
						
						
						
					 
					
						2015-07-14 23:47:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							38ca0c33f5 
							
						 
					 
					
						
						
							
							Merge branch 'neuralnet' into refactor  
						
						... 
						
						
						
						Mostly refactors parser, to use new thinc3.2 Example class.
Aim is to remove use of shared memory, so that we can parallelize
over documents easily.
Conflicts:
	setup.py
	spacy/syntax/parser.pxd
	spacy/syntax/parser.pyx
	spacy/syntax/stateclass.pyx 
						
					 
					
						2015-07-14 14:13:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6eef0bf9ab 
							
						 
					 
					
						
						
							
							* Break up tokens.pyx into tokens/doc.pyx, tokens/token.pyx, tokens/spans.pyx  
						
						
						
					 
					
						2015-07-13 20:20:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							55f1042443 
							
						 
					 
					
						
						
							
							* Improve efficiency of L and R features, correcting the non-linear-in-length problem.  
						
						
						
					 
					
						2015-07-09 12:17:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							70d2acb579 
							
						 
					 
					
						
						
							
							* Fix edge features  
						
						
						
					 
					
						2015-07-09 12:15:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							adb868bdad 
							
						 
					 
					
						
						
							
							* Add warning for models not found in parser  
						
						
						
					 
					
						2015-07-08 20:04:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							05b28ec9eb 
							
						 
					 
					
						
						
							
							* Add warning for models not found in parser  
						
						
						
					 
					
						2015-07-08 20:02:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef700401a6 
							
						 
					 
					
						
						
							
							* Add warning for models not found in parser  
						
						
						
					 
					
						2015-07-08 20:00:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6218d8b389 
							
						 
					 
					
						
						
							
							* Add warning for models not found in parser  
						
						
						
					 
					
						2015-07-08 19:59:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f6a6c39ce8 
							
						 
					 
					
						
						
							
							* Add warning for models not found in parser  
						
						
						
					 
					
						2015-07-08 19:52:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ceb1f71c2 
							
						 
					 
					
						
						
							
							* Update parse features  
						
						
						
					 
					
						2015-07-08 19:11:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb522496dd 
							
						 
					 
					
						
						
							
							* Rename Tokens to Doc  
						
						
						
					 
					
						2015-07-08 18:53:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ff885e8511 
							
						 
					 
					
						
						
							
							* Add ParserFactory convenience function  
						
						
						
					 
					
						2015-07-08 12:35:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							52fd80c6c6 
							
						 
					 
					
						
						
							
							* Add experimental supersense features for parsing, based on lookup into wordnet.  
						
						
						
					 
					
						2015-07-01 20:12:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e20106fdff 
							
						 
					 
					
						
						
							
							* Begin reorganizing neuralnet work  
						
						
						
					 
					
						2015-06-30 14:26:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3bb5876c5a 
							
						 
					 
					
						
						
							
							* Inline methods in StateClass  
						
						
						
					 
					
						2015-06-29 01:10:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							313a7f87b3 
							
						 
					 
					
						
						
							
							* Inline methods in StateClass  
						
						
						
					 
					
						2015-06-29 01:06:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a02fd3af5d 
							
						 
					 
					
						
						
							
							* Check valency in L and R feature methods, to make feaure calculation faster  
						
						
						
					 
					
						2015-06-29 00:27:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d870720bc 
							
						 
					 
					
						
						
							
							* Check valency in L and R feature methods, to make feaure calculation faster  
						
						
						
					 
					
						2015-06-29 00:17:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f4986d5d3c 
							
						 
					 
					
						
						
							
							* Use new Example class  
						
						
						
					 
					
						2015-06-28 22:36:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							735f1af91f 
							
						 
					 
					
						
						
							
							* Fix neural net stuff  
						
						
						
					 
					
						2015-06-28 11:44:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e7003f1cf3 
							
						 
					 
					
						
						
							
							* Remove hard-coding of vector lengths  
						
						
						
					 
					
						2015-06-28 11:37:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							897dd0dd0b 
							
						 
					 
					
						
						
							
							* Merge changes, and adjust Example to use memoryview  
						
						
						
					 
					
						2015-06-28 11:36:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9282a8e72c 
							
						 
					 
					
						
						
							
							* Prepare for new models to be plugged in by using Example class  
						
						
						
					 
					
						2015-06-28 11:02:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							75aeccc064 
							
						 
					 
					
						
						
							
							* Rejig parser interface to use new thinc.api.Example class, in prep of theano model. Comment out beam search  
						
						
						
					 
					
						2015-06-28 11:02:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bbef71f213 
							
						 
					 
					
						
						
							
							* Fix min function in fill_context  
						
						
						
					 
					
						2015-06-28 10:46:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							142b6f9510 
							
						 
					 
					
						
						
							
							* Revert last changes  
						
						
						
					 
					
						2015-06-28 10:44:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b06962f18b 
							
						 
					 
					
						
						
							
							* Pad buffers in state  
						
						
						
					 
					
						2015-06-28 10:36:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							53be72387c 
							
						 
					 
					
						
						
							
							* Hack at fill_context to investigate performance loss  
						
						
						
					 
					
						2015-06-28 10:34:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							71a4e876a9 
							
						 
					 
					
						
						
							
							* Fix parse features  
						
						
						
					 
					
						2015-06-28 09:27:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5af500909c 
							
						 
					 
					
						
						
							
							* Remove unused directve from parser.pyx  
						
						
						
					 
					
						2015-06-28 06:20:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d5b4090705 
							
						 
					 
					
						
						
							
							* Add profile directive  
						
						
						
					 
					
						2015-06-28 06:19:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2b5421e60c 
							
						 
					 
					
						
						
							
							* Add profile directive  
						
						
						
					 
					
						2015-06-28 06:07:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8b5de4a411 
							
						 
					 
					
						
						
							
							* Add word / tag / label sets, for use in neural net  
						
						
						
					 
					
						2015-06-28 05:46:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ed40a8380e 
							
						 
					 
					
						
						
							
							* Remove hard-coding of vector lengths  
						
						
						
					 
					
						2015-06-27 04:18:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ebe630cc8d 
							
						 
					 
					
						
						
							
							* Enable more features for NN  
						
						
						
					 
					
						2015-06-27 04:17:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f8bb43475e 
							
						 
					 
					
						
						
							
							* Bridge to Theano working. Very disorganised. Using thinc adb60aba966ed2  
						
						
						
					 
					
						2015-06-27 02:39:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2fe98b8a9a 
							
						 
					 
					
						
						
							
							* Prepare for new models to be plugged in by using Example class  
						
						
						
					 
					
						2015-06-26 13:51:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6896455884 
							
						 
					 
					
						
						
							
							* Rejig parser interface to use new thinc.api.Example class, in prep of theano model. Comment out beam search  
						
						
						
					 
					
						2015-06-26 06:25:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							02b171ee67 
							
						 
					 
					
						
						
							
							* Bug fixes to edge calculation  
						
						
						
					 
					
						2015-06-24 04:28:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7f9384f53c 
							
						 
					 
					
						
						
							
							* Remove deprecated _state module  
						
						
						
					 
					
						2015-06-23 17:28:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6dbe182491 
							
						 
					 
					
						
						
							
							* Fix merge conflicts  
						
						
						
					 
					
						2015-06-23 17:28:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							579735a095 
							
						 
					 
					
						
						
							
							* Remove import of _state module  
						
						
						
					 
					
						2015-06-23 17:25:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							88f55d136b 
							
						 
					 
					
						
						
							
							* Remove deprecated _state module  
						
						
						
					 
					
						2015-06-23 17:19:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9ab9dd2bf7 
							
						 
					 
					
						
						
							
							* Clean up unused orig_arc_eager and tree_arc_eager modules, which were only added for EMNLP experiments  
						
						
						
					 
					
						2015-06-23 17:17:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ebfe4b983 
							
						 
					 
					
						
						
							
							* Fixes to edge features  
						
						
						
					 
					
						2015-06-23 16:32:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7b125f5a86 
							
						 
					 
					
						
						
							
							* Fixes to edge features  
						
						
						
					 
					
						2015-06-23 16:31:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35c290bee4 
							
						 
					 
					
						
						
							
							* Fix edge features  
						
						
						
					 
					
						2015-06-23 15:50:56 +02:00