ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8bc05c2ba9 
							
						 
					 
					
						
						
							
							Delete old training scripts ( resolves   #911 )  
						
						
						
					 
					
						2017-03-23 11:07:59 +01:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							08346dba1a 
							
						 
					 
					
						
						
							
							Use specific language class instead of base Language class  
						
						
						
					 
					
						2017-03-21 23:18:54 +01:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							7568cd6bf8 
							
						 
					 
					
						
						
							
							Split CONLLX file using tabs and not default split separators  
						
						
						
					 
					
						2017-03-21 23:00:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef6bd08e6c 
							
						 
					 
					
						
						
							
							Update train_ud for Universal Dependencies 2  
						
						
						
					 
					
						2017-03-16 17:08:15 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a155482fda 
							
						 
					 
					
						
						
							
							Improve printing in train_ud script  
						
						
						
					 
					
						2017-03-11 11:11:05 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35124b144a 
							
						 
					 
					
						
						
							
							Add L1 penalty option to parser  
						
						
						
					 
					
						2017-03-09 18:44:53 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04a51dab62 
							
						 
					 
					
						
						
							
							Print active parser features during training  
						
						
						
					 
					
						2017-03-08 01:37:19 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c744ce4b6d 
							
						 
					 
					
						
						
							
							Fix bad change to cythonize.py script, re subprocess call  
						
						
						
					 
					
						2017-02-16 19:01:25 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							071d11cb35 
							
						 
					 
					
						
						
							
							Pass environment to Cythonize script.  Closes   #791  
						
						
						
					 
					
						2017-02-17 01:04:16 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ff92184f1 
							
						 
					 
					
						
						
							
							Improve train_ud script  
						
						
						
					 
					
						2017-01-09 09:53:46 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1ef07788c 
							
						 
					 
					
						
						
							
							Update train_ud.py  
						
						... 
						
						
						
						Create deps folder if it doesn't exist. 
						
					 
					
						2017-01-09 10:55:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							46e98ec029 
							
						 
					 
					
						
						
							
							Move init_model.py script from repo. These meta-tools should live elsewhere  
						
						
						
					 
					
						2016-12-18 14:03:40 +01:00 
						 
				 
			
				
					
						
							
							
								dafnevk 
							
						 
					 
					
						
						
						
						
							
						
						
							cdf5dcc40a 
							
						 
					 
					
						
						
							
							fixed bug in init_model so that it runs for dutch  
						
						
						
					 
					
						2016-12-13 14:33:44 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c7889492f9 
							
						 
					 
					
						
						
							
							Fix model saving error for Python 3  
						
						
						
					 
					
						2016-11-25 18:04:30 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							22189e60db 
							
						 
					 
					
						
						
							
							Use unicode literals in train_ud  
						
						
						
					 
					
						2016-11-25 17:45:45 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							da5f0cce36 
							
						 
					 
					
						
						
							
							Fix train_ud script, which trains models from the Universal Dependencies format.  
						
						
						
					 
					
						2016-11-25 11:19:33 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							314bc8d34f 
							
						 
					 
					
						
						
							
							Fix train script for 1.0  
						
						
						
					 
					
						2016-11-25 08:57:37 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bd1bfcca61 
							
						 
					 
					
						
						
							
							Update train.py  
						
						
						
					 
					
						2016-10-13 03:23:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ea23b64cc8 
							
						 
					 
					
						
						
							
							Refactor training, with new spacy.train module. Defaults still a little awkward.  
						
						
						
					 
					
						2016-10-09 12:24:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							53fbd3dd1c 
							
						 
					 
					
						
						
							
							Fix train.py for v1.0.0-rc1  
						
						
						
					 
					
						2016-10-05 01:11:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ae202e7a60 
							
						 
					 
					
						
						
							
							Fix init_model.py  
						
						
						
					 
					
						2016-09-25 15:58:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af847e07fc 
							
						 
					 
					
						
						
							
							Fix usage of pathlib for Python3 -- turning paths to strings.  
						
						
						
					 
					
						2016-09-24 21:05:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d310dc73ef 
							
						 
					 
					
						
						
							
							Fix bin/init_model.py after refactoring  
						
						
						
					 
					
						2016-09-24 20:38:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8036368d96 
							
						 
					 
					
						
						
							
							* Fix model saving  
						
						
						
					 
					
						2016-05-23 12:01:46 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35214053fd 
							
						 
					 
					
						
						
							
							* Work around get_lex_attr bug introduced during German parsing  
						
						
						
					 
					
						2016-05-23 10:53:00 +00:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							dae6bc05eb 
							
						 
					 
					
						
						
							
							define German dummy lemmatizer until morphology is done  
						
						
						
					 
					
						2016-05-02 16:04:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8569dbc2d0 
							
						 
					 
					
						
						
							
							* Add initial stuff for Chinese parsing  
						
						
						
					 
					
						2016-04-24 18:44:24 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							f9150ccf2a 
							
						 
					 
					
						
						
							
							rename vectors.tgz to vectors.bz2 because it's not compressed with gzip but bzip  
						
						
						
					 
					
						2016-04-08 13:38:07 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							a8f4e49900 
							
						 
					 
					
						
						
							
							update init_model.py to previous (better) state  
						
						
						
					 
					
						2016-03-29 16:12:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d249e2f7f3 
							
						 
					 
					
						
						
							
							* Improve error message in bin/parser/train.py  
						
						
						
					 
					
						2016-03-29 13:04:33 +11:00 
						 
				 
			
				
					
						
							
							
								Yaser Martinez Palenzuela 
							
						 
					 
					
						
						
						
						
							
						
						
							3c210f45fa 
							
						 
					 
					
						
						
							
							make use of log_smooth_count  
						
						
						
					 
					
						2016-03-17 12:19:52 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fcaa0ad7ce 
							
						 
					 
					
						
						
							
							Merge pull request  #280  from wbwseeker/german_parser  
						
						... 
						
						
						
						German parser 
						
					 
					
						2016-03-04 03:27:42 +11:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							690c5acabf 
							
						 
					 
					
						
						
							
							adjust train.py to train both english and german models  
						
						
						
					 
					
						2016-03-03 15:21:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9d51e4d13c 
							
						 
					 
					
						
						
							
							Delete gather_freqs.py  
						
						... 
						
						
						
						This script was in a broken state, and should be unnecessary. The functionality is subsumed by `get_freqs.py` 
						
					 
					
						2016-03-02 00:42:55 +11:00 
						 
				 
			
				
					
						
							
							
								Yaser Martinez Palenzuela 
							
						 
					 
					
						
						
						
						
							
						
						
							1a93d7f725 
							
						 
					 
					
						
						
							
							replace codecs.open with io.open  
						
						
						
					 
					
						2016-03-01 14:10:11 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							eae35e9b27 
							
						 
					 
					
						
						
							
							add tokenizer files for German, add/change code to train German pos tagger  
						
						... 
						
						
						
						- add files to specify rules for German tokenization
- change generate_specials.py to generate from an external file (abbrev.de.tab)
- copy gazetteer.json from lang_data/en/
- init_model.py
	- change doc freq threshold to 0
- add train_german_tagger.py
	- expects conll09-formatted input 
						
					 
					
						2016-02-18 13:24:20 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							a89ca6537b 
							
						 
					 
					
						
						
							
							fix cythonize  
						
						
						
					 
					
						2016-02-05 16:17:23 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							3a50448bf3 
							
						 
					 
					
						
						
							
							py3 compatibility  
						
						
						
					 
					
						2016-02-05 15:43:50 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							7627969aba 
							
						 
					 
					
						
						
							
							refactor, listen on setup.py, *.pxd  
						
						
						
					 
					
						2016-02-05 15:37:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5dc6cffc67 
							
						 
					 
					
						
						
							
							* Fix gather_freqs.py  
						
						
						
					 
					
						2016-02-04 20:21:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e2ed6251d7 
							
						 
					 
					
						
						
							
							* Fancy up the CLI for the conll train script  
						
						
						
					 
					
						2016-02-02 22:58:06 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a676d66807 
							
						 
					 
					
						
						
							
							* Update the CoNLL train script, to get working on other languages  
						
						
						
					 
					
						2016-02-02 22:29:34 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							73674a4afb 
							
						 
					 
					
						
						
							
							try using system-wide headers  
						
						
						
					 
					
						2015-12-13 12:51:23 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							92fabd0114 
							
						 
					 
					
						
						
							
							wrap virtualenv around cythonize  
						
						
						
					 
					
						2015-12-13 12:32:22 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							9662cf04c9 
							
						 
					 
					
						
						
							
							new approach to dependency headers  
						
						
						
					 
					
						2015-12-13 11:53:02 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e68b344c1 
							
						 
					 
					
						
						
							
							* Train after parsing, not before.  
						
						
						
					 
					
						2015-11-12 04:43:52 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4fb038a9eb 
							
						 
					 
					
						
						
							
							* Update conll_train.py script for spaCy v0.97  
						
						
						
					 
					
						2015-10-31 00:53:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cfaa4bde5d 
							
						 
					 
					
						
						
							
							* Add train and parse scripts that use CoNLL formatted data  
						
						
						
					 
					
						2015-10-30 12:54:49 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2348a08481 
							
						 
					 
					
						
						
							
							* Load/dump strings with a json file, instead of the hacky strings file we were using.  
						
						
						
					 
					
						2015-10-22 21:13:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ce12e4548 
							
						 
					 
					
						
						
							
							* Import io in get_freqs  
						
						
						
					 
					
						2015-10-19 12:56:18 +11:00