Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							7627969aba 
							
						 
					 
					
						
						
							
							refactor, listen on setup.py, *.pxd  
						
						
						
					 
					
						2016-02-05 15:37:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5dc6cffc67 
							
						 
					 
					
						
						
							
							* Fix gather_freqs.py  
						
						
						
					 
					
						2016-02-04 20:21:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e2ed6251d7 
							
						 
					 
					
						
						
							
							* Fancy up the CLI for the conll train script  
						
						
						
					 
					
						2016-02-02 22:58:06 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a676d66807 
							
						 
					 
					
						
						
							
							* Update the CoNLL train script, to get working on other languages  
						
						
						
					 
					
						2016-02-02 22:29:34 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							73674a4afb 
							
						 
					 
					
						
						
							
							try using system-wide headers  
						
						
						
					 
					
						2015-12-13 12:51:23 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							92fabd0114 
							
						 
					 
					
						
						
							
							wrap virtualenv around cythonize  
						
						
						
					 
					
						2015-12-13 12:32:22 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							9662cf04c9 
							
						 
					 
					
						
						
							
							new approach to dependency headers  
						
						
						
					 
					
						2015-12-13 11:53:02 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e68b344c1 
							
						 
					 
					
						
						
							
							* Train after parsing, not before.  
						
						
						
					 
					
						2015-11-12 04:43:52 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4fb038a9eb 
							
						 
					 
					
						
						
							
							* Update conll_train.py script for spaCy v0.97  
						
						
						
					 
					
						2015-10-31 00:53:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cfaa4bde5d 
							
						 
					 
					
						
						
							
							* Add train and parse scripts that use CoNLL formatted data  
						
						
						
					 
					
						2015-10-30 12:54:49 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2348a08481 
							
						 
					 
					
						
						
							
							* Load/dump strings with a json file, instead of the hacky strings file we were using.  
						
						
						
					 
					
						2015-10-22 21:13:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ce12e4548 
							
						 
					 
					
						
						
							
							* Import io in get_freqs  
						
						
						
					 
					
						2015-10-19 12:56:18 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17fffb4c57 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:33:49 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ff4454177 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:31:15 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a748146dd3 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:24:50 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a29fd79fbc 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:24:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e08a4b46a2 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:20:35 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							92f750cf8b 
							
						 
					 
					
						
						
							
							* Use a gzipped frequencies file in init_model  
						
						
						
					 
					
						2015-10-11 06:59:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							064bd69ad0 
							
						 
					 
					
						
						
							
							* Refactor symbols, so that frequency rank can be derived from the orth id of a word.  
						
						
						
					 
					
						2015-10-10 16:03:48 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83dccf0fd7 
							
						 
					 
					
						
						
							
							* Use io module insteads of deprecated codecs module  
						
						
						
					 
					
						2015-10-10 14:13:01 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f35632e2e5 
							
						 
					 
					
						
						
							
							* Remove SBD print statement in train, after SBD evaluation was removed from Scorer  
						
						
						
					 
					
						2015-10-09 11:08:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6ea1601e93 
							
						 
					 
					
						
						
							
							* Add script to train models off the UD treebanks. Note that the UD data is restricted to research purposes only, and should only be used to train models for academic experiments.  
						
						
						
					 
					
						2015-10-08 12:01:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c503654ec1 
							
						 
					 
					
						
						
							
							* Update bin/parser/train for printing output.  
						
						
						
					 
					
						2015-10-06 10:35:22 +11:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							8caedba42a 
							
						 
					 
					
						
						
							
							caught more codecs.open -> io.open  
						
						
						
					 
					
						2015-09-30 20:20:09 +02:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							764bdc62e7 
							
						 
					 
					
						
						
							
							caught another codecs.open  
						
						
						
					 
					
						2015-09-30 20:16:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1ae55cb63a 
							
						 
					 
					
						
						
							
							* Copy tag_map.json in init_model  
						
						
						
					 
					
						2015-09-12 05:54:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b2e82e55f6 
							
						 
					 
					
						
						
							
							* Create POS model dir in training script  
						
						
						
					 
					
						2015-09-08 15:36:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ad4527c42 
							
						 
					 
					
						
						
							
							* Rename Deutsch to German  
						
						
						
					 
					
						2015-09-06 20:18:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d1eea2d865 
							
						 
					 
					
						
						
							
							* Update train.py for language-generic spaCy  
						
						
						
					 
					
						2015-09-06 17:51:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							950ce36660 
							
						 
					 
					
						
						
							
							* Update init model  
						
						
						
					 
					
						2015-09-06 17:51:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b6b1e1aa12 
							
						 
					 
					
						
						
							
							* Add link for Finnish model  
						
						
						
					 
					
						2015-08-27 10:26:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							320ced276a 
							
						 
					 
					
						
						
							
							* Add tagger training script  
						
						
						
					 
					
						2015-08-27 09:15:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc13edd7cb 
							
						 
					 
					
						
						
							
							* Refactor init_model to accomodate other languages  
						
						
						
					 
					
						2015-08-26 19:14:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bbf07ac253 
							
						 
					 
					
						
						
							
							* Cut down init_model to work on more languages  
						
						
						
					 
					
						2015-08-24 01:05:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ecacb9635 
							
						 
					 
					
						
						
							
							* Copy gazetteer file in init_model  
						
						
						
					 
					
						2015-08-06 16:07:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ddc1a5cfe5 
							
						 
					 
					
						
						
							
							* Fix training under python3  
						
						
						
					 
					
						2015-07-28 14:09:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							174ed1ad20 
							
						 
					 
					
						
						
							
							* Tighten the frequency filter in init_model  
						
						
						
					 
					
						2015-07-27 21:44:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6047f2aa35 
							
						 
					 
					
						
						
							
							* Fix path to freqs.txt  
						
						
						
					 
					
						2015-07-27 02:22:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0368889d6c 
							
						 
					 
					
						
						
							
							* Support gzipped frequencies in init_model  
						
						
						
					 
					
						2015-07-26 22:39:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4f20847da 
							
						 
					 
					
						
						
							
							* Fix init_model for travis tests  
						
						
						
					 
					
						2015-07-26 14:03:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09312b9353 
							
						 
					 
					
						
						
							
							* Fix init_model for travis tests  
						
						
						
					 
					
						2015-07-26 13:55:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							90ad717dc4 
							
						 
					 
					
						
						
							
							* Update default freq thresholds in init_model  
						
						
						
					 
					
						2015-07-26 01:41:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a5e035a48 
							
						 
					 
					
						
						
							
							* Ensure data files are copied for tokenizer in init_model  
						
						
						
					 
					
						2015-07-26 01:36:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ab93898ac6 
							
						 
					 
					
						
						
							
							* Make heuristics more explicit in init_model  
						
						
						
					 
					
						2015-07-26 00:22:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5c04dcd7c1 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 23:33:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd525f0675 
							
						 
					 
					
						
						
							
							* Pass OOV probability around  
						
						
						
					 
					
						2015-07-25 23:29:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5b6bf4d4a6 
							
						 
					 
					
						
						
							
							* Remove probability cap on lexicon  
						
						
						
					 
					
						2015-07-25 23:05:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c62eb110c0 
							
						 
					 
					
						
						
							
							* Fix merge conflict in init_model  
						
						
						
					 
					
						2015-07-25 23:04:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0301472d15 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 22:56:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e800adfbc 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 22:54:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5f183098e4 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-07-25 22:37:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6076213c16 
							
						 
					 
					
						
						
							
							* Fix init_model script  
						
						
						
					 
					
						2015-07-25 22:35:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1a99eb69da 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-07-25 22:19:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef448649b3 
							
						 
					 
					
						
						
							
							* Add read_freqs function in init_model  
						
						
						
					 
					
						2015-07-25 22:16:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e6a60eaec 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-07-25 21:14:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							105305b4aa 
							
						 
					 
					
						
						
							
							* Upd get_freqs script  
						
						
						
					 
					
						2015-07-25 21:13:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							616445e027 
							
						 
					 
					
						
						
							
							* Add simple script to collate frequencies from sorted file  
						
						
						
					 
					
						2015-07-25 21:12:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c52179f5fa 
							
						 
					 
					
						
						
							
							* Use print function in train.py, for py 2/3 compatibility  
						
						
						
					 
					
						2015-07-24 04:52:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6be3ee311c 
							
						 
					 
					
						
						
							
							Py3 compatibility tweak  
						
						
						
					 
					
						2015-07-23 13:13:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d4407d8e2f 
							
						 
					 
					
						
						
							
							Py3 compatibility tweak  
						
						
						
					 
					
						2015-07-23 09:45:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							da4821fc14 
							
						 
					 
					
						
						
							
							* Add cluster words to probs in init_model  
						
						
						
					 
					
						2015-07-23 09:27:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4af2595d99 
							
						 
					 
					
						
						
							
							* Fix structure of wordnet directory for init_model  
						
						
						
					 
					
						2015-07-23 06:35:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83c0f0da22 
							
						 
					 
					
						
						
							
							* Remove lemmatizer from init_model  
						
						
						
					 
					
						2015-07-23 02:32:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4729200dfc 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-07-23 01:19:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2b7bd46508 
							
						 
					 
					
						
						
							
							* Update get_freqs script  
						
						
						
					 
					
						2015-07-22 15:43:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							386246db5b 
							
						 
					 
					
						
						
							
							* Update init_model, making language resources optional  
						
						
						
					 
					
						2015-07-22 00:25:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							317cbbc015 
							
						 
					 
					
						
						
							
							* Serialization round trip now working with decent API, but with rough spots in the organisation and requiring vocabulary to be fixed ahead of time.  
						
						
						
					 
					
						2015-07-19 15:18:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ff7e6ca4 
							
						 
					 
					
						
						
							
							* Fix redundant options in train.py  
						
						
						
					 
					
						2015-07-17 22:38:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6cfa83157e 
							
						 
					 
					
						
						
							
							Merge branch 'refactor' of ssh://github.com/honnibal/spaCy into refactor  
						
						
						
					 
					
						2015-07-17 21:38:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							38ca0c33f5 
							
						 
					 
					
						
						
							
							Merge branch 'neuralnet' into refactor  
						
						... 
						
						
						
						Mostly refactors parser, to use new thinc3.2 Example class.
Aim is to remove use of shared memory, so that we can parallelize
over documents easily.
Conflicts:
	setup.py
	spacy/syntax/parser.pxd
	spacy/syntax/parser.pyx
	spacy/syntax/stateclass.pyx 
						
					 
					
						2015-07-14 14:13:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af54d05d60 
							
						 
					 
					
						
						
							
							* Remove sense stuff from init_model  
						
						
						
					 
					
						2015-07-14 10:56:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3de1b3ef1d 
							
						 
					 
					
						
						
							
							* Change get_freqs to take a list of files  
						
						
						
					 
					
						2015-07-14 10:55:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							39c93116eb 
							
						 
					 
					
						
						
							
							* Add get_freqs script  
						
						
						
					 
					
						2015-07-14 02:31:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							62cfcd76fe 
							
						 
					 
					
						
						
							
							* Add supersense sets to lexemes, from WordNet. Look-up via lemmatization.  
						
						
						
					 
					
						2015-07-01 18:48:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							31b5e58aeb 
							
						 
					 
					
						
						
							
							* Begin reorganizing neuralnet work  
						
						
						
					 
					
						2015-06-30 14:26:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1135cfe50a 
							
						 
					 
					
						
						
							
							* Tidy nn_train a bit  
						
						
						
					 
					
						2015-06-29 16:45:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							df8179ca4f 
							
						 
					 
					
						
						
							
							* Add separate Param and AdadeltaParam classes. AdadeltaParam seems broken.  
						
						
						
					 
					
						2015-06-29 16:39:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1dff04acb5 
							
						 
					 
					
						
						
							
							* Apply regularization to the softmax, not the bias  
						
						
						
					 
					
						2015-06-29 11:45:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca30fe1582 
							
						 
					 
					
						
						
							
							* Use He initialization trick  
						
						
						
					 
					
						2015-06-29 10:56:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc34e1b6e4 
							
						 
					 
					
						
						
							
							* Move Theano functions into nn_train.py script  
						
						
						
					 
					
						2015-06-29 07:09:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fe7b24ecef 
							
						 
					 
					
						
						
							
							* whitespace  
						
						
						
					 
					
						2015-06-28 11:37:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7b8275fcc4 
							
						 
					 
					
						
						
							
							* Wire hyperparameters to script interface  
						
						
						
					 
					
						2015-06-28 11:37:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							897dd0dd0b 
							
						 
					 
					
						
						
							
							* Merge changes, and adjust Example to use memoryview  
						
						
						
					 
					
						2015-06-28 11:36:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef97b90833 
							
						 
					 
					
						
						
							
							* Fix token scoring  
						
						
						
					 
					
						2015-06-28 06:22:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							34c0ef2ee8 
							
						 
					 
					
						
						
							
							* Don't compile the orig_arc_eager and tree_arc_eager modules used for the EMNLP paper  
						
						
						
					 
					
						2015-06-23 05:38:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59e9f9153c 
							
						 
					 
					
						
						
							
							* Remove projectivity constraint in train.py, but raise Exception if non-projective sentence is encountered, since we've told GoldParse to projectivize  
						
						
						
					 
					
						2015-06-23 05:04:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							839e5038b7 
							
						 
					 
					
						
						
							
							* Raise exception on non-projective input  
						
						
						
					 
					
						2015-06-23 00:01:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4dad4058c3 
							
						 
					 
					
						
						
							
							* Uncomment NER training  
						
						
						
					 
					
						2015-06-16 23:36:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5699585278 
							
						 
					 
					
						
						
							
							* Use tree_arc_eager system as baseline in experiments  
						
						
						
					 
					
						2015-06-15 08:23:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4841f8ad5e 
							
						 
					 
					
						
						
							
							* Set transition system early  
						
						
						
					 
					
						2015-06-15 02:54:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bcfdf126a4 
							
						 
					 
					
						
						
							
							* Add toggle for OrigArcEager system  
						
						
						
					 
					
						2015-06-14 20:28:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c500d72dc2 
							
						 
					 
					
						
						
							
							* Temporarily disable NER, and wire up the verbose flag during training  
						
						
						
					 
					
						2015-06-14 17:45:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac422492cf 
							
						 
					 
					
						
						
							
							* Fix write_parses mode of bin/parser/train.py  
						
						
						
					 
					
						2015-06-07 19:08:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4073533e28 
							
						 
					 
					
						
						
							
							* Upd munge_ewtb for the new json format  
						
						
						
					 
					
						2015-06-06 02:10:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a1341b29e 
							
						 
					 
					
						
						
							
							* Add tb pre-process script  
						
						
						
					 
					
						2015-06-06 01:59:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1736fc5a67 
							
						 
					 
					
						
						
							
							* Add more options to bin/parser/train  
						
						
						
					 
					
						2015-06-05 23:49:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							362f87dc3a 
							
						 
					 
					
						
						
							
							* Update input corruption method to work with lists as well as trings  
						
						
						
					 
					
						2015-06-05 19:33:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0aed9c9a33 
							
						 
					 
					
						
						
							
							* Fix train.py  
						
						
						
					 
					
						2015-06-05 15:50:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8466600add 
							
						 
					 
					
						
						
							
							* Clean up train.py, removing unused tag jackknifing code  
						
						
						
					 
					
						2015-06-05 15:01:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e772b48dcd 
							
						 
					 
					
						
						
							
							* Skip sentences of length 1 in training  
						
						
						
					 
					
						2015-06-05 02:29:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e822df0867 
							
						 
					 
					
						
						
							
							* Fix bugs in new greedy/beam parser  
						
						
						
					 
					
						2015-06-02 02:01:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							70a7ad89ca 
							
						 
					 
					
						
						
							
							* Removed unused imports from train.py  
						
						
						
					 
					
						2015-06-02 00:59:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3de20118e 
							
						 
					 
					
						
						
							
							* Wire up beam-width command line argument  
						
						
						
					 
					
						2015-06-02 00:54:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							08044ea70c 
							
						 
					 
					
						
						
							
							* Remove try/except around parser.train  
						
						
						
					 
					
						2015-05-31 15:21:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c8a553fe91 
							
						 
					 
					
						
						
							
							* Fix cluster initialization  
						
						
						
					 
					
						2015-05-31 15:21:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d7cc2338e7 
							
						 
					 
					
						
						
							
							* Fix bug in train.py  
						
						
						
					 
					
						2015-05-31 06:49:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c037f80638 
							
						 
					 
					
						
						
							
							* Add case expansion to Brown clusters  
						
						
						
					 
					
						2015-05-31 05:50:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ab0f233a1 
							
						 
					 
					
						
						
							
							* Ensure words in Brown clusters make it into the vocab, even if they're not in our probs list  
						
						
						
					 
					
						2015-05-31 05:46:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d42dda0372 
							
						 
					 
					
						
						
							
							* Shuffle docs before doing jackknife partition --- otherwise we'll not get the right genre mixes...  
						
						
						
					 
					
						2015-05-31 01:25:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4d8d490547 
							
						 
					 
					
						
						
							
							* Exclude empty sentences in prepare_treebank  
						
						
						
					 
					
						2015-05-31 01:12:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d512d20d81 
							
						 
					 
					
						
						
							
							* Allow parser to jackknife POS tags before training.  
						
						
						
					 
					
						2015-05-31 01:11:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6bbdcc5db5 
							
						 
					 
					
						
						
							
							* Fix gold_preproc flag in train.py  
						
						
						
					 
					
						2015-05-30 05:23:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76300bbb1b 
							
						 
					 
					
						
						
							
							* Use updated JSON format, with sentences below paragraphs. Allows use of gold preprocessing flag.  
						
						
						
					 
					
						2015-05-30 01:25:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2d11739f28 
							
						 
					 
					
						
						
							
							* Change data format of JSON corpus, putting sentences into lists with the paragraph  
						
						
						
					 
					
						2015-05-30 01:25:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							784e577f45 
							
						 
					 
					
						
						
							
							* Check NER length matches conll length in prepare_treebank  
						
						
						
					 
					
						2015-05-29 03:54:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b76bbbd12c 
							
						 
					 
					
						
						
							
							* Read json files recursively from a directory, instead of requiring a single .json file  
						
						
						
					 
					
						2015-05-29 03:52:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef67ef7a4c 
							
						 
					 
					
						
						
							
							* Recomment in training in train.py  
						
						
						
					 
					
						2015-05-28 22:40:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5eb64eeb11 
							
						 
					 
					
						
						
							
							* Print json treebank by genre, instead of by large file  
						
						
						
					 
					
						2015-05-28 22:40:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f42dc1f7d8 
							
						 
					 
					
						
						
							
							* Fix evaluate method in train.py, to use sentences which don't have raw text  
						
						
						
					 
					
						2015-05-28 16:30:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a7cee46fe9 
							
						 
					 
					
						
						
							
							* Update train.py, to support paragraphs where there's no raw_text  
						
						
						
					 
					
						2015-05-27 19:14:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef1333cf89 
							
						 
					 
					
						
						
							
							* Have prepare_treebank read train/dev/test IDs.  
						
						
						
					 
					
						2015-05-27 17:35:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e140e03516 
							
						 
					 
					
						
						
							
							* Read in OntoNotes. Doesn't support train/test/dev split yet  
						
						
						
					 
					
						2015-05-27 17:04:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							895060e774 
							
						 
					 
					
						
						
							
							* Ensure tagger and NER are trained, even if non-projective problem  
						
						
						
					 
					
						2015-05-27 03:16:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							32ae2cdabe 
							
						 
					 
					
						
						
							
							* In prepare_treebank, move ner into the token descriptions  
						
						
						
					 
					
						2015-05-26 19:52:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							61885aee76 
							
						 
					 
					
						
						
							
							* Work on prepare_treebank script, adding NER to it  
						
						
						
					 
					
						2015-05-26 19:28:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							15bbbf4901 
							
						 
					 
					
						
						
							
							* Remove cruft from train.py  
						
						
						
					 
					
						2015-05-25 07:54:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc75210941 
							
						 
					 
					
						
						
							
							* Move spacy.syntax.conll to spacy.gold  
						
						
						
					 
					
						2015-05-24 21:35:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							541c62c126 
							
						 
					 
					
						
						
							
							* Remove import of removed read_docparse_file function  
						
						
						
					 
					
						2015-05-24 20:05:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bfeb29ebd1 
							
						 
					 
					
						
						
							
							* Tmp commit  
						
						
						
					 
					
						2015-05-24 02:50:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							983d954ef4 
							
						 
					 
					
						
						
							
							* Tmp commit, while switch to new format that assumes alignment happens during training  
						
						
						
					 
					
						2015-05-23 17:39:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f35503018e 
							
						 
					 
					
						
						
							
							* Tmp commit of train, while I move to better alignment in gold standard  
						
						
						
					 
					
						2015-05-23 17:21:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d6b3fc6fb 
							
						 
					 
					
						
						
							
							* Restore shuffling, and remove print statements from train.py  
						
						
						
					 
					
						2015-05-12 20:27:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e167355505 
							
						 
					 
					
						
						
							
							* Use JSON docs for training and evaluation. Currently a bug that is costing 0.6 acc  
						
						
						
					 
					
						2015-05-12 20:27:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e0ef6b6992 
							
						 
					 
					
						
						
							
							* Fix alignment in prepare_treebank  
						
						
						
					 
					
						2015-05-12 20:27:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ad72a77ce 
							
						 
					 
					
						
						
							
							* Write JSON files, with both dependency and PSG parses  
						
						
						
					 
					
						2015-05-12 20:27:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5078a32213 
							
						 
					 
					
						
						
							
							* Work on script to format training data as a JSON file.  
						
						
						
					 
					
						2015-05-12 20:27:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fb8d50b3d5 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-04-30 12:45:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4489d87550 
							
						 
					 
					
						
						
							
							* Add cluster=0 by default in init_model  
						
						
						
					 
					
						2015-04-29 14:23:13 +02:00 
						 
				 
			
				
					
						
							
							
								Jordan Suchow 
							
						 
					 
					
						
						
						
						
							
						
						
							3a8d9b37a6 
							
						 
					 
					
						
						
							
							Remove trailing whitespace  
						
						
						
					 
					
						2015-04-19 13:01:38 -07:00 
						 
				 
			
				
					
						
							
							
								Jordan Suchow 
							
						 
					 
					
						
						
						
						
							
						
						
							5f0f940a1f 
							
						 
					 
					
						
						
							
							Remove unused imports  
						
						
						
					 
					
						2015-04-19 01:05:22 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							693c5a1558 
							
						 
					 
					
						
						
							
							* Exclude clusterings for words only seen 1 or 2 times, as their clusters are unreliable  
						
						
						
					 
					
						2015-04-17 04:44:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							adcad4f353 
							
						 
					 
					
						
						
							
							* Clean up train.py  
						
						
						
					 
					
						2015-04-15 06:02:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1629b33082 
							
						 
					 
					
						
						
							
							* Fix copying of tokenizer data in init_model  
						
						
						
					 
					
						2015-04-12 04:45:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ac92f077 
							
						 
					 
					
						
						
							
							* Respect the model_dir input parameter to train.py  
						
						
						
					 
					
						2015-04-08 22:48:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							baff0f8ad8 
							
						 
					 
					
						
						
							
							* Add docstring explaining script a bit, and add handling of word vectors  
						
						
						
					 
					
						2015-04-08 08:20:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							156b70ed82 
							
						 
					 
					
						
						
							
							* Add new script to replace make_lexicon, that does full setup of data  
						
						
						
					 
					
						2015-04-08 07:46:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e775e05313 
							
						 
					 
					
						
						
							
							* Use merge_mwe=False in evaluation in train.py  
						
						
						
					 
					
						2015-04-08 00:35:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f26f381b0e 
							
						 
					 
					
						
						
							
							* Add simple ner_tag script  
						
						
						
					 
					
						2015-04-03 17:26:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb27979352 
							
						 
					 
					
						
						
							
							* Add prepare_vecs script  
						
						
						
					 
					
						2015-04-02 06:19:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cd054c6c9f 
							
						 
					 
					
						
						
							
							* Remove stray print statement  
						
						
						
					 
					
						2015-03-27 15:20:42 +01:00