Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d8aa721664 
							
						 
					 
					
						
						
							
							Compute Language.meta with a property  
						
						
						
					 
					
						2017-07-23 00:50:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							baa3d81c35 
							
						 
					 
					
						
						
							
							Add text categorizer to Language  
						
						
						
					 
					
						2017-07-22 01:13:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							836bfa2d0f 
							
						 
					 
					
						
						
							
							Add factory for experimental SimilarityHook component  
						
						
						
					 
					
						2017-06-05 15:40:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2479cde446 
							
						 
					 
					
						
						
							
							Support disable keyword in Language.__init__  
						
						
						
					 
					
						2017-06-05 13:13:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8f8f90b46b 
							
						 
					 
					
						
						
							
							Disable labeller if not parsing  
						
						
						
					 
					
						2017-06-04 20:18:54 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							939e8ed567 
							
						 
					 
					
						
						
							
							Add lookup properties for components in Language  
						
						
						
					 
					
						2017-06-04 15:52:09 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							92ae36f84e 
							
						 
					 
					
						
						
							
							Improve way noun chunks iterator is looked up  
						
						
						
					 
					
						2017-06-04 21:53:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							21eef90dbc 
							
						 
					 
					
						
						
							
							Support specifying which GPU  
						
						
						
					 
					
						2017-06-03 16:10:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fea1144e6d 
							
						 
					 
					
						
						
							
							Set max batch size in evaluate  
						
						
						
					 
					
						2017-06-03 13:31:33 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a3e4f91f4a 
							
						 
					 
					
						
						
							
							Only load vocab if it exists  
						
						
						
					 
					
						2017-06-01 14:38:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							33e5ec737f 
							
						 
					 
					
						
						
							
							Fix to/from disk methods  
						
						
						
					 
					
						2017-05-31 13:43:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1e6df0a2a1 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-29 14:30:12 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6145fe6a93 
							
						 
					 
					
						
						
							
							Catch all kwargs on Language  
						
						
						
					 
					
						2017-05-29 20:43:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c9ee24411 
							
						 
					 
					
						
						
							
							Fix broken lambda scoping in Python 2  
						
						
						
					 
					
						2017-05-29 13:23:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aa4c33914b 
							
						 
					 
					
						
						
							
							Work on serialization  
						
						
						
					 
					
						2017-05-29 08:40:45 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7b06bb896e 
							
						 
					 
					
						
						
							
							Fix for serialization  
						
						
						
					 
					
						2017-05-29 13:42:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74235587ef 
							
						 
					 
					
						
						
							
							Fix to serialization  
						
						
						
					 
					
						2017-05-29 13:40:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59f355d525 
							
						 
					 
					
						
						
							
							Fixes for serialization  
						
						
						
					 
					
						2017-05-29 13:38:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ff26aa6c37 
							
						 
					 
					
						
						
							
							Work on to/from bytes/disk serialization methods  
						
						
						
					 
					
						2017-05-29 11:45:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a24c60c1e 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-28 08:12:05 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bc97bc292c 
							
						 
					 
					
						
						
							
							Fix __call__ method  
						
						
						
					 
					
						2017-05-28 08:11:58 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b082f76494 
							
						 
					 
					
						
						
							
							Randomize pipeline order during training  
						
						
						
					 
					
						2017-05-27 18:32:21 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							73a643d32a 
							
						 
					 
					
						
						
							
							Don't randomise pipeline for training, and don't update if no gradient  
						
						
						
					 
					
						2017-05-27 08:20:13 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8af3100143 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-26 11:31:41 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							353f0ef8d7 
							
						 
					 
					
						
						
							
							Use disable argument (list) for serialization  
						
						
						
					 
					
						2017-05-26 12:33:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dbf2a4cf57 
							
						 
					 
					
						
						
							
							Update all models on each epoch  
						
						
						
					 
					
						2017-05-25 19:46:56 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							82b11b0320 
							
						 
					 
					
						
						
							
							Remove print statement  
						
						
						
					 
					
						2017-05-25 17:15:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f403c2cd5f 
							
						 
					 
					
						
						
							
							Add env opts for optimizer  
						
						
						
					 
					
						2017-05-25 11:19:26 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8500d9b1da 
							
						 
					 
					
						
						
							
							Only train one task per iter, holding grads  
						
						
						
					 
					
						2017-05-25 06:47:42 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e6cc927ab1 
							
						 
					 
					
						
						
							
							Rearrange multi-task learning  
						
						
						
					 
					
						2017-05-24 20:10:54 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9adfe9e8fc 
							
						 
					 
					
						
						
							
							Don't hold gradient updates in language -- let the parser decide how to batch the updates.  
						
						
						
					 
					
						2017-05-23 04:29:10 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3959d778ac 
							
						 
					 
					
						
						
							
							Revert "Revert "WIP on improving parser efficiency""  
						
						... 
						
						
						
						This reverts commit 532afef4a8 
						
					 
					
						2017-05-23 03:06:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							532afef4a8 
							
						 
					 
					
						
						
							
							Revert "WIP on improving parser efficiency"  
						
						... 
						
						
						
						This reverts commit bdaac7ab44 
						
					 
					
						2017-05-23 03:05:25 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdaac7ab44 
							
						 
					 
					
						
						
							
							WIP on improving parser efficiency  
						
						
						
					 
					
						2017-05-23 02:59:31 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							54f04a9fe0 
							
						 
					 
					
						
						
							
							Update API docs with changes in spacy.gold and spacy.language  
						
						
						
					 
					
						2017-05-22 12:29:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9262fc4829 
							
						 
					 
					
						
						
							
							Fix syntax error  
						
						
						
					 
					
						2017-05-22 05:14:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2a5eb9f61e 
							
						 
					 
					
						
						
							
							Make nonproj methods top-level functions, instead of class methods  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5738d373d5 
							
						 
					 
					
						
						
							
							Add deprojectivize to pipeline  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8d1e64be69 
							
						 
					 
					
						
						
							
							Add experimental NeuralLabeller  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5db89053aa 
							
						 
					 
					
						
						
							
							Merge docstrings  
						
						
						
					 
					
						2017-05-21 13:46:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							432b3499b3 
							
						 
					 
					
						
						
							
							Fix memory leak  
						
						
						
					 
					
						2017-05-21 13:38:46 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4c9202249d 
							
						 
					 
					
						
						
							
							Refactor training, to fix memory leak  
						
						
						
					 
					
						2017-05-21 09:07:06 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d82ae9a585 
							
						 
					 
					
						
						
							
							Change "function" to "callable" in docs  
						
						
						
					 
					
						2017-05-21 13:17:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3b7c108246 
							
						 
					 
					
						
						
							
							Pass tokvecs through as a list, instead of concatenated. Also fix padding  
						
						
						
					 
					
						2017-05-20 13:23:32 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							66ea9aebe7 
							
						 
					 
					
						
						
							
							Remove the state argument from Language  
						
						
						
					 
					
						2017-05-19 13:25:42 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2c8c9dc0c9 
							
						 
					 
					
						
						
							
							Update docstrings and API docs for Language  
						
						
						
					 
					
						2017-05-19 18:47:24 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d42bc16868 
							
						 
					 
					
						
						
							
							Update docstrings and API docs for Language class  
						
						
						
					 
					
						2017-05-18 23:57:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2c825127a 
							
						 
					 
					
						
						
							
							Fix use_params and pipe methods  
						
						
						
					 
					
						2017-05-18 08:30:59 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2713041571 
							
						 
					 
					
						
						
							
							Fix GPU usage in Language  
						
						
						
					 
					
						2017-05-18 04:25:19 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							793430aa7a 
							
						 
					 
					
						
						
							
							Get spaCy train command working with neural network  
						
						... 
						
						
						
						* Integrate models into pipeline
* Add basic serialization (maybe incorrect)
* Fix pickle on vocab 
						
					 
					
						2017-05-17 12:04:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8cf097ca88 
							
						 
					 
					
						
						
							
							Redesign training to integrate NN components  
						
						... 
						
						
						
						* Obsolete .parser, .entity etc names in favour of .pipeline
* Components no longer create models on initialization
* Models created by loading method (from_disk(), from_bytes() etc), or
    .begin_training()
* Add .predict(), .set_annotations() methods in components
* Pass state through pipeline, to allow components to share information
    more flexibly. 
						
					 
					
						2017-05-16 16:17:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5211645af3 
							
						 
					 
					
						
						
							
							Get data flowing through pipeline. Needs redesign  
						
						
						
					 
					
						2017-05-16 11:21:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a9edb3aa1d 
							
						 
					 
					
						
						
							
							Improve integration of NN parser, to support unified training API  
						
						
						
					 
					
						2017-05-15 21:53:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e167b7bb6 
							
						 
					 
					
						
						
							
							Strip serializer from code  
						
						
						
					 
					
						2017-05-09 17:28:50 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ea5fa46475 
							
						 
					 
					
						
						
							
							Import LEX_ATTRS from lang.lex_attrs  
						
						
						
					 
					
						2017-05-09 00:58:10 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6eb6306843 
							
						 
					 
					
						
						
							
							Fix language data imports  
						
						
						
					 
					
						2017-05-08 23:58:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d0e19267e8 
							
						 
					 
					
						
						
							
							Create directory if missing in save_to_directory  
						
						
						
					 
					
						2017-04-23 21:24:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4d2a659c52 
							
						 
					 
					
						
						
							
							Fix json dump for Python3  
						
						
						
					 
					
						2017-04-23 17:05:53 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ddd5194088 
							
						 
					 
					
						
						
							
							Update Language docs and docstrings  
						
						
						
					 
					
						2017-04-17 01:52:13 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							f62b740961 
							
						 
					 
					
						
						
							
							Use compat.json_dumps  
						
						
						
					 
					
						2017-04-17 01:46:14 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8e83f8e2fa 
							
						 
					 
					
						
						
							
							Update docstrings  
						
						
						
					 
					
						2017-04-17 01:40:26 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e2299dc389 
							
						 
					 
					
						
						
							
							Ensure path in save_to_directory  
						
						
						
					 
					
						2017-04-17 01:40:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4efd6fb9d6 
							
						 
					 
					
						
						
							
							Fix training  
						
						
						
					 
					
						2017-04-16 15:28:27 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							89a4f262fc 
							
						 
					 
					
						
						
							
							Fix training methods  
						
						
						
					 
					
						2017-04-16 13:00:37 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c05ec4b89a 
							
						 
					 
					
						
						
							
							Add compat functions and remove old workarounds  
						
						... 
						
						
						
						Add ensure_path util function to handle checking instance of path 
						
					 
					
						2017-04-15 12:11:16 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d24589aa72 
							
						 
					 
					
						
						
							
							Clean up imports, unused code, whitespace, docstrings  
						
						
						
					 
					
						2017-04-15 12:05:47 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							561f2a3eb4 
							
						 
					 
					
						
						
							
							Use consistent formatting for docstrings  
						
						
						
					 
					
						2017-04-15 11:59:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							33ba5066eb 
							
						 
					 
					
						
						
							
							Refactor Language.end_training, making new save_to_directory method  
						
						
						
					 
					
						2017-04-14 23:51:24 +02:00 
						 
				 
			
				
					
						
							
							
								oeg 
							
						 
					 
					
						
						
						
						
							
						
						
							010293fb2f 
							
						 
					 
					
						
						
							
							fix(typo): Fixes typo in method calling PseudoProjectivity.deprojectivize, failing with new train cli  
						
						
						
					 
					
						2017-04-06 17:33:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							47a3ef06a6 
							
						 
					 
					
						
						
							
							Unhack deprojetivization, moving it into pipeline  
						
						... 
						
						
						
						Previously the deprojectivize() call was attached to the transition
system, and only called for German. Instead it should be a separate
process, called after the parser. This makes it available for any
language. Closes  #898 . 
						
					 
					
						2017-03-31 12:31:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83ba6c247c 
							
						 
					 
					
						
						
							
							Fix init of Language without model  
						
						
						
					 
					
						2017-03-26 16:46:00 +02:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							f332bf05be 
							
						 
					 
					
						
						
							
							Remove unused import statements  
						
						
						
					 
					
						2017-03-21 21:08:54 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9605cf39cc 
							
						 
					 
					
						
						
							
							Handle default path in Language classes  
						
						
						
					 
					
						2017-03-18 12:58:45 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8843b84bd1 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop-downloads'  
						
						
						
					 
					
						2017-03-16 12:00:42 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							618ce3b425 
							
						 
					 
					
						
						
							
							Add .meta to Language object  
						
						... 
						
						
						
						Allows getting the current model's meta data, e.g.:
nlp = spacy.load('my-model')
print(nlp.meta) 
						
					 
					
						2017-03-16 17:14:56 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b382dc902c 
							
						 
					 
					
						
						
							
							Add morph rules in Language  
						
						
						
					 
					
						2017-03-15 09:24:40 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f70be44746 
							
						 
					 
					
						
						
							
							Use lemmatizer in code, not from downloaded model.  
						
						
						
					 
					
						2017-03-15 04:52:50 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f71eeef9bb 
							
						 
					 
					
						
						
							
							Pass path argument to end_training  
						
						
						
					 
					
						2017-03-09 18:42:40 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cd33b39a04 
							
						 
					 
					
						
						
							
							Fix 2/3 problem for json save/load  
						
						
						
					 
					
						2017-03-08 01:39:13 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							aa876884f0 
							
						 
					 
					
						
						
							
							Revert "Revert "Merge remote-tracking branch 'origin/master'""  
						
						... 
						
						
						
						This reverts commit fb9d3bb022 
						
					 
					
						2017-01-09 13:28:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3679fb43a3 
							
						 
					 
					
						
						
							
							Fix loading of lemmatizer  
						
						
						
					 
					
						2016-12-18 17:34:09 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b11d8cd3db 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/organize-language-data' into organize-language-data  
						
						
						
					 
					
						2016-12-18 16:57:12 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							753068f1d5 
							
						 
					 
					
						
						
							
							Use base language data as default  
						
						
						
					 
					
						2016-12-18 16:55:25 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bcc1d50d09 
							
						 
					 
					
						
						
							
							Remove trailing whitespace  
						
						
						
					 
					
						2016-12-18 16:54:52 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							44f4f008bd 
							
						 
					 
					
						
						
							
							Wire up lemmatizer rules for English  
						
						
						
					 
					
						2016-12-18 15:50:09 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							296d33a4fc 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/explosion/spaCy  
						
						
						
					 
					
						2016-11-26 12:36:18 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f6c37c6f5 
							
						 
					 
					
						
						
							
							Fix create_tokenizer when nlp is None  
						
						
						
					 
					
						2016-11-26 12:36:04 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c7889492f9 
							
						 
					 
					
						
						
							
							Fix model saving error for Python 3  
						
						
						
					 
					
						2016-11-25 18:04:30 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							159e8c46e1 
							
						 
					 
					
						
						
							
							Merge old training fixes with newer state  
						
						
						
					 
					
						2016-11-25 09:16:36 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a2f55e7015 
							
						 
					 
					
						
						
							
							Pass cfg through loading, for training.  
						
						
						
					 
					
						2016-11-25 09:01:20 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09f68bc641 
							
						 
					 
					
						
						
							
							Fix Issue  #639 : stop words in language class not used. This patch is messy, but it's better not to change too much until the language data loading can be properly refactored.  
						
						
						
					 
					
						2016-11-24 00:13:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							48e1dc29d4 
							
						 
					 
					
						
						
							
							Fix default path loading.  
						
						
						
					 
					
						2016-11-23 23:48:55 +01:00 
						 
				 
			
				
					
						
							
							
								ExplodingCabbage 
							
						 
					 
					
						
						
						
						
							
						
						
							6c4f488e89 
							
						 
					 
					
						
						
							
							Fix syntax mistake  
						
						
						
					 
					
						2016-11-23 15:12:45 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							60eb2343ce 
							
						 
					 
					
						
						
							
							Only try to load vectors if they exist.  
						
						
						
					 
					
						2016-11-23 13:50:24 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							618ac36093 
							
						 
					 
					
						
						
							
							Fix use of path argument in Language.__init__. Needs to be keyword arg, not positional.  
						
						
						
					 
					
						2016-11-23 13:26:34 +01:00 
						 
				 
			
				
					
						
							
							
								Mark Amery 
							
						 
					 
					
						
						
						
						
							
						
						
							fbe19680a6 
							
						 
					 
					
						
						
							
							Fix another bug related to Language.__init__'s path parameter  
						
						
						
					 
					
						2016-11-20 20:31:34 +00:00 
						 
				 
			
				
					
						
							
							
								Mark Amery 
							
						 
					 
					
						
						
						
						
							
						
						
							b0a07c21a0 
							
						 
					 
					
						
						
							
							Fix path param of Language.__init__ always being ignored  
						
						... 
						
						
						
						There was an explicitly-declared `path` keyword argument, so 'path'
would never be present in `**overrides`. This line just overwrote
any manually-specified value the user might've passed to the `path`
parameter. 
						
					 
					
						2016-11-20 16:29:57 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							22647c2423 
							
						 
					 
					
						
						
							
							Check that patterns aren't null before compiling regex for tokenizer  
						
						
						
					 
					
						2016-11-02 20:35:29 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f7fee6c24b 
							
						 
					 
					
						
						
							
							Check for class-defined make_docs method before assigning one provided as an argument  
						
						
						
					 
					
						2016-11-02 19:57:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b86f8af0c1 
							
						 
					 
					
						
						
							
							Fix doc strings  
						
						
						
					 
					
						2016-11-01 12:25:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cb49189477 
							
						 
					 
					
						
						
							
							Remove dead code  
						
						
						
					 
					
						2016-10-26 13:11:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							150e02d72e 
							
						 
					 
					
						
						
							
							Fix Issue  #566  
						
						
						
					 
					
						2016-10-23 20:19:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							739213a8af 
							
						 
					 
					
						
						
							
							Fix create_pipeline keyword argument.  
						
						
						
					 
					
						2016-10-23 14:24:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ec32f5d97 
							
						 
					 
					
						
						
							
							Fix loading of GloVe vectors, to address Issue  #541  
						
						
						
					 
					
						2016-10-20 18:27:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d4aaf2752c 
							
						 
					 
					
						
						
							
							Fix issue  #535 : Pipeline elements added even when data not installed.  
						
						
						
					 
					
						2016-10-19 19:55:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1b651db9c5 
							
						 
					 
					
						
						
							
							Fix parser creation in Language class.  
						
						
						
					 
					
						2016-10-18 19:36:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							45a6f9b9c7 
							
						 
					 
					
						
						
							
							Fix loading of tagger.  
						
						
						
					 
					
						2016-10-18 19:33:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7d5212f131 
							
						 
					 
					
						
						
							
							Refactor defaults  
						
						
						
					 
					
						2016-10-18 16:18:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f787cd29fe 
							
						 
					 
					
						
						
							
							Refactor the pipeline classes to make them more consistent, and remove the redundant blank() constructor.  
						
						
						
					 
					
						2016-10-16 21:34:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca51f3b77e 
							
						 
					 
					
						
						
							
							Use DependencyParser and EntityRecognizer in the Language class.  
						
						
						
					 
					
						2016-10-16 17:58:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a81c5a7abf 
							
						 
					 
					
						
						
							
							Fix name of labels keyword to 'actions'.  
						
						
						
					 
					
						2016-10-16 12:00:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a6b35d266 
							
						 
					 
					
						
						
							
							Delay binding in MakeDoc  
						
						
						
					 
					
						2016-10-16 11:41:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							08e9134760 
							
						 
					 
					
						
						
							
							Change default value of path to True  
						
						
						
					 
					
						2016-10-15 14:12:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6d8cb515ac 
							
						 
					 
					
						
						
							
							Break the tokenization stage out of the pipeline into a function 'make_doc'. This allows all pipeline methods to have the same signature.  
						
						
						
					 
					
						2016-10-14 17:38:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							41f88ce938 
							
						 
					 
					
						
						
							
							Fix dep model loading in parser  
						
						
						
					 
					
						2016-10-12 20:26:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0e2bedc373 
							
						 
					 
					
						
						
							
							Fix default labels for parser and NER  
						
						
						
					 
					
						2016-10-12 19:12:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							847a4a4182 
							
						 
					 
					
						
						
							
							Refactor Language, dropping Language.blank() method.  
						
						
						
					 
					
						2016-10-12 13:45:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ea23b64cc8 
							
						 
					 
					
						
						
							
							Refactor training, with new spacy.train module. Defaults still a little awkward.  
						
						
						
					 
					
						2016-10-09 12:24:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eceeaefe53 
							
						 
					 
					
						
						
							
							Fix defaults for Parser and Entity, adding a blank= argument.  
						
						
						
					 
					
						2016-09-30 19:56:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e382e48d9f 
							
						 
					 
					
						
						
							
							Temporarily patch handling of defaul templates for tagger. Need to move these to language_data.  
						
						
						
					 
					
						2016-09-27 13:21:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b14b9b096b 
							
						 
					 
					
						
						
							
							Return None if /deps directory not present, instead of trying to load the parser.  
						
						
						
					 
					
						2016-09-26 18:48:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0b2d7ae9d6 
							
						 
					 
					
						
						
							
							Fix Entity creation  
						
						
						
					 
					
						2016-09-26 15:41:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2debc4e0a2 
							
						 
					 
					
						
						
							
							Add .blank() method to Parser. Start housing default dep labels and entity types within the Defaults class.  
						
						
						
					 
					
						2016-09-26 11:57:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							722199acb8 
							
						 
					 
					
						
						
							
							Add spacy.blank() method, that doesn't load data. Don't try to load data if path is falsey  
						
						
						
					 
					
						2016-09-26 11:07:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7db956133e 
							
						 
					 
					
						
						
							
							Move tokenizer data for German into spacy.de.language_data  
						
						
						
					 
					
						2016-09-25 15:37:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							95aaea0d3f 
							
						 
					 
					
						
						
							
							Refactor so that the tokenizer data is read from Python data, rather than from disk  
						
						
						
					 
					
						2016-09-25 14:49:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd58f7655a 
							
						 
					 
					
						
						
							
							Python 3 compatible basestring  
						
						
						
					 
					
						2016-09-24 22:16:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd65cf6cbb 
							
						 
					 
					
						
						
							
							Finish refactoring data loading  
						
						
						
					 
					
						2016-09-24 20:26:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83e364188c 
							
						 
					 
					
						
						
							
							Mostly finished loading refactoring. Design is in place, but doesn't work yet.  
						
						
						
					 
					
						2016-09-24 15:42:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9dc8043a7e 
							
						 
					 
					
						
						
							
							Refactor Language to use new Defaults class, and work on revised data loading. We're getting rid of sputnik's weird file-system wrapper, and using pathlib.  
						
						
						
					 
					
						2016-09-24 14:08:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4d7f5468bb 
							
						 
					 
					
						
						
							
							* Change Language class to use a .pipeline attribute, instead of having the pipeline hard coded  
						
						
						
					 
					
						2016-05-17 16:55:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0f957dd586 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2016-04-14 10:37:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							61d20de35d 
							
						 
					 
					
						
						
							
							* Fix language.py docstring  
						
						
						
					 
					
						2016-04-14 10:36:57 +02:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							ff690f76ba 
							
						 
					 
					
						
						
							
							fix loading non-german models  
						
						
						
					 
					
						2016-04-12 16:00:56 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							03fb498dbe 
							
						 
					 
					
						
						
							
							introduce lang field for LexemeC to hold language id  
						
						... 
						
						
						
						put noun_chunk logic into iterators.py for each language separately 
						
					 
					
						2016-03-10 13:01:34 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							bc9c62e279 
							
						 
					 
					
						
						
							
							replace Language functions with corresponding orth functions  
						
						... 
						
						
						
						implement punctuation functions in orth 
						
					 
					
						2016-03-09 18:07:37 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							931c07a609 
							
						 
					 
					
						
						
							
							initial proposal for separate vector package  
						
						
						
					 
					
						2016-03-04 11:09:06 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a95974ad3f 
							
						 
					 
					
						
						
							
							* Fix oov probability  
						
						
						
					 
					
						2016-02-06 15:13:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1ef84a0557 
							
						 
					 
					
						
						
							
							* Merge master into rethinc2  
						
						
						
					 
					
						2016-02-05 12:55:59 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							249dccbe95 
							
						 
					 
					
						
						
							
							* Fix Language.pipe  
						
						
						
					 
					
						2016-02-05 12:47:57 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af58f273b3 
							
						 
					 
					
						
						
							
							* Fix spacy.language.pipe  
						
						
						
					 
					
						2016-02-05 12:20:29 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							419edfab50 
							
						 
					 
					
						
						
							
							* Use generic flags for the new attributes until they're added  
						
						
						
					 
					
						2016-02-04 15:50:54 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e5c96c969f 
							
						 
					 
					
						
						
							
							* Wire up new attributes  
						
						
						
					 
					
						2016-02-04 13:04:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							84b247ef83 
							
						 
					 
					
						
						
							
							* Add a .pipe method, that takes a stream of input, operates on it, and streams the output. Internally, the stream may be buffered, to allow multi-threading.  
						
						
						
					 
					
						2016-02-03 02:10:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fcfc17a164 
							
						 
					 
					
						
						
							
							Merge branch 'master' into rethinc2  
						
						
						
					 
					
						2016-02-02 23:05:34 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59123443e2 
							
						 
					 
					
						
						
							
							* Check for presence/absence of the different models in Language.end_training  
						
						
						
					 
					
						2016-02-02 22:49:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e9d4c8706 
							
						 
					 
					
						
						
							
							* Fix stupid error in Language.batch  
						
						
						
					 
					
						2016-02-01 09:49:32 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							98fbdf2856 
							
						 
					 
					
						
						
							
							* Add Language.batch() method, to support multi-threaded jobs  
						
						
						
					 
					
						2016-02-01 09:01:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4a89d56bd 
							
						 
					 
					
						
						
							
							* Automatically register any entity types pre-set on the tokens, so that the NER works with user-given entity types.  
						
						
						
					 
					
						2016-01-19 20:09:26 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bba0a5e078 
							
						 
					 
					
						
						
							
							* Handle string paths in default_vocab, default_parser, default_entity in Language class  
						
						
						
					 
					
						2016-01-18 22:37:24 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							41ea14a56f 
							
						 
					 
					
						
						
							
							fix pickling  
						
						
						
					 
					
						2016-01-16 13:23:11 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							235f094534 
							
						 
					 
					
						
						
							
							untangle data_path/via  
						
						
						
					 
					
						2016-01-16 12:23:45 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							846fa49b2a 
							
						 
					 
					
						
						
							
							distinct load() and from_package() methods  
						
						
						
					 
					
						2016-01-16 10:00:57 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							211913d689 
							
						 
					 
					
						
						
							
							add about.py, adapt setup.py  
						
						
						
					 
					
						2016-01-15 18:57:01 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							f8a8f97d25 
							
						 
					 
					
						
						
							
							cleanup  
						
						
						
					 
					
						2016-01-15 18:13:37 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							780cb847c9 
							
						 
					 
					
						
						
							
							add default_model to about  
						
						
						
					 
					
						2016-01-15 18:07:15 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							788f734513 
							
						 
					 
					
						
						
							
							refactored data_dir->via, add zip_safe, add spacy.load()  
						
						
						
					 
					
						2016-01-15 18:01:02 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							bc229790ac 
							
						 
					 
					
						
						
							
							integrate with sputnik  
						
						
						
					 
					
						2016-01-13 19:46:17 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eaf2ad59f1 
							
						 
					 
					
						
						
							
							* Fix use of mock Package object  
						
						
						
					 
					
						2015-12-31 04:13:15 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ba43ecaf 
							
						 
					 
					
						
						
							
							* Fix errors in packaging revision  
						
						
						
					 
					
						2015-12-29 18:37:26 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aec130af56 
							
						 
					 
					
						
						
							
							Use util.Package class for io  
						
						... 
						
						
						
						Previous Sputnik integration caused API change: Vocab, Tagger, etc
were loaded via a from_package classmethod, that required a
sputnik.Package instance. This forced users to first create a
sputnik.Sputnik() instance, in order to acquire a Package via
sp.pool().
Instead I've created a small file-system shim, util.Package, which
allows classes to have a .load() classmethod, that accepts either
util.Package objects, or strings. We can later gut the internals
of this and make it a proxy for Sputnik if we need more functionality
that should live in the Sputnik library.
Sputnik is now only used to download and install the data, in
spacy.en.download 
						
					 
					
						2015-12-29 18:00:48 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f5dea1406d 
							
						 
					 
					
						
						
							
							* Fix silly mistake in Language.__init__  
						
						
						
					 
					
						2015-12-28 18:48:57 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							187960606f 
							
						 
					 
					
						
						
							
							* Fix pickle problems  
						
						
						
					 
					
						2015-12-28 16:54:03 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8c7e149ec9 
							
						 
					 
					
						
						
							
							* Replace kwargs argument of Language.__init__ with explicit arguments, to fix pickle bug  
						
						
						
					 
					
						2015-12-28 15:56:27 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							d8d348bb55 
							
						 
					 
					
						
						
							
							allow to specify version constraint within model name  
						
						
						
					 
					
						2015-12-18 19:12:08 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							cfa187aaf0 
							
						 
					 
					
						
						
							
							fix tests  
						
						
						
					 
					
						2015-12-18 10:58:02 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							8359bd4d93 
							
						 
					 
					
						
						
							
							strip data/ from package, friendlier Language invocation, make data_dir backward/forward-compatible  
						
						
						
					 
					
						2015-12-18 09:52:55 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							345dda6f53 
							
						 
					 
					
						
						
							
							small fixes, add package build step  
						
						
						
					 
					
						2015-12-07 06:50:26 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							9027cef3bc 
							
						 
					 
					
						
						
							
							access model via sputnik  
						
						
						
					 
					
						2015-12-07 06:01:28 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3c162dcac3 
							
						 
					 
					
						
						
							
							* Refactor away from the _ml module, to use thinc 4.0. Still some work needs to be done, e.g. to add __reduce__ to the models, more testing, etc.  
						
						
						
					 
					
						2015-11-07 03:24:30 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							adc7bbd6cf 
							
						 
					 
					
						
						
							
							* Fix name of like_num in default_lex_attrs  
						
						
						
					 
					
						2015-11-04 22:02:47 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e96faf29e7 
							
						 
					 
					
						
						
							
							* Rename like_number to like_num, to fix inconsistency re Issue  #166  
						
						
						
					 
					
						2015-11-04 22:01:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f18fd8c659 
							
						 
					 
					
						
						
							
							* Fix language.py for change in StringStore load API  
						
						
						
					 
					
						2015-10-23 03:48:12 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2348a08481 
							
						 
					 
					
						
						
							
							* Load/dump strings with a json file, instead of the hacky strings file we were using.  
						
						
						
					 
					
						2015-10-22 21:13:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9baf0abd59 
							
						 
					 
					
						
						
							
							* Save vocab after training.  
						
						
						
					 
					
						2015-10-22 21:09:14 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20fd36a0f7 
							
						 
					 
					
						
						
							
							* Very scrappy, likely buggy first-cut pickle implementation, to work on Issue  #125 : allow pickle for Apache Spark. The current implementation sends stuff to temp files, and does almost nothing to ensure all modifiable state is actually preserved. The Language() instance is a deep tree of extension objects, and if pickling during training, some of the C-data state is hard to preserve.  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ced80c0c 
							
						 
					 
					
						
						
							
							* Fix Issue  #116 : Misleading handling of True value in Language.__init__.  
						
						
						
					 
					
						2015-09-29 20:54:12 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							27f988b167 
							
						 
					 
					
						
						
							
							* Remove the vectors option to Vocab, preferring to either load vectors from disk, or set them on the Lexeme objects.  
						
						
						
					 
					
						2015-09-15 14:41:48 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e13e47e9e5 
							
						 
					 
					
						
						
							
							* Add English stop words  
						
						
						
					 
					
						2015-09-14 17:48:51 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d9f1fc2112 
							
						 
					 
					
						
						
							
							* Add deprecation warning for unused load_vectors argument.  
						
						
						
					 
					
						2015-09-09 14:31:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							534e3dda3c 
							
						 
					 
					
						
						
							
							* More work on language independent parsing  
						
						
						
					 
					
						2015-08-28 03:44:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2307fa9ee 
							
						 
					 
					
						
						
							
							* More work on language-generic parsing  
						
						
						
					 
					
						2015-08-28 02:02:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0af139e183 
							
						 
					 
					
						
						
							
							* Tagger training now working. Still need to test load/save of model. Morphology still broken.  
						
						
						
					 
					
						2015-08-27 09:16:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76996f4145 
							
						 
					 
					
						
						
							
							* Hack on generic Language class. Still needs work for morphology, defaults, etc  
						
						
						
					 
					
						2015-08-26 19:16:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f2f699ac18 
							
						 
					 
					
						
						
							
							* Add language base class  
						
						
						
					 
					
						2015-08-25 15:37:17 +02:00