Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7a33f1e2b7 
							
						 
					 
					
						
						
							
							Add dep to supertag.  
						
						
						
					 
					
						2017-05-08 07:50:01 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							66252f3e71 
							
						 
					 
					
						
						
							
							Change vector width  
						
						
						
					 
					
						2017-05-08 14:47:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e2268a442 
							
						 
					 
					
						
						
							
							Precomputable hidden now working  
						
						
						
					 
					
						2017-05-08 11:36:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10682d35ab 
							
						 
					 
					
						
						
							
							Get pre-computed version working  
						
						
						
					 
					
						2017-05-08 00:38:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6782eedf9b 
							
						 
					 
					
						
						
							
							Tmp GPU code  
						
						
						
					 
					
						2017-05-07 11:04:24 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e420e5a809 
							
						 
					 
					
						
						
							
							Tmp  
						
						
						
					 
					
						2017-05-07 07:31:09 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f99f5b75dc 
							
						 
					 
					
						
						
							
							working residual net  
						
						
						
					 
					
						2017-05-07 03:57:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdf2dba9fb 
							
						 
					 
					
						
						
							
							WIP on refactor, with hidde pre-computing  
						
						
						
					 
					
						2017-05-07 02:02:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b439e04f8d 
							
						 
					 
					
						
						
							
							Learning smoothly  
						
						
						
					 
					
						2017-05-06 20:38:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04ae1c01f1 
							
						 
					 
					
						
						
							
							Learns things  
						
						
						
					 
					
						2017-05-06 18:21:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bcf4cd0a5f 
							
						 
					 
					
						
						
							
							Learns things  
						
						
						
					 
					
						2017-05-06 17:37:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e48b58cd6 
							
						 
					 
					
						
						
							
							Gradients look correct  
						
						
						
					 
					
						2017-05-06 16:47:15 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8bc05c2ba9 
							
						 
					 
					
						
						
							
							Delete old training scripts ( resolves   #911 )  
						
						
						
					 
					
						2017-03-23 11:07:59 +01:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							08346dba1a 
							
						 
					 
					
						
						
							
							Use specific language class instead of base Language class  
						
						
						
					 
					
						2017-03-21 23:18:54 +01:00 
						 
				 
			
				
					
						
							
							
								Raphaël Bournhonesque 
							
						 
					 
					
						
						
						
						
							
						
						
							7568cd6bf8 
							
						 
					 
					
						
						
							
							Split CONLLX file using tabs and not default split separators  
						
						
						
					 
					
						2017-03-21 23:00:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef6bd08e6c 
							
						 
					 
					
						
						
							
							Update train_ud for Universal Dependencies 2  
						
						
						
					 
					
						2017-03-16 17:08:15 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a155482fda 
							
						 
					 
					
						
						
							
							Improve printing in train_ud script  
						
						
						
					 
					
						2017-03-11 11:11:05 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35124b144a 
							
						 
					 
					
						
						
							
							Add L1 penalty option to parser  
						
						
						
					 
					
						2017-03-09 18:44:53 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04a51dab62 
							
						 
					 
					
						
						
							
							Print active parser features during training  
						
						
						
					 
					
						2017-03-08 01:37:19 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c744ce4b6d 
							
						 
					 
					
						
						
							
							Fix bad change to cythonize.py script, re subprocess call  
						
						
						
					 
					
						2017-02-16 19:01:25 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							071d11cb35 
							
						 
					 
					
						
						
							
							Pass environment to Cythonize script.  Closes   #791  
						
						
						
					 
					
						2017-02-17 01:04:16 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ff92184f1 
							
						 
					 
					
						
						
							
							Improve train_ud script  
						
						
						
					 
					
						2017-01-09 09:53:46 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1ef07788c 
							
						 
					 
					
						
						
							
							Update train_ud.py  
						
						... 
						
						
						
						Create deps folder if it doesn't exist. 
						
					 
					
						2017-01-09 10:55:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							46e98ec029 
							
						 
					 
					
						
						
							
							Move init_model.py script from repo. These meta-tools should live elsewhere  
						
						
						
					 
					
						2016-12-18 14:03:40 +01:00 
						 
				 
			
				
					
						
							
							
								dafnevk 
							
						 
					 
					
						
						
						
						
							
						
						
							cdf5dcc40a 
							
						 
					 
					
						
						
							
							fixed bug in init_model so that it runs for dutch  
						
						
						
					 
					
						2016-12-13 14:33:44 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c7889492f9 
							
						 
					 
					
						
						
							
							Fix model saving error for Python 3  
						
						
						
					 
					
						2016-11-25 18:04:30 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							22189e60db 
							
						 
					 
					
						
						
							
							Use unicode literals in train_ud  
						
						
						
					 
					
						2016-11-25 17:45:45 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							da5f0cce36 
							
						 
					 
					
						
						
							
							Fix train_ud script, which trains models from the Universal Dependencies format.  
						
						
						
					 
					
						2016-11-25 11:19:33 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							314bc8d34f 
							
						 
					 
					
						
						
							
							Fix train script for 1.0  
						
						
						
					 
					
						2016-11-25 08:57:37 -06:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bd1bfcca61 
							
						 
					 
					
						
						
							
							Update train.py  
						
						
						
					 
					
						2016-10-13 03:23:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ea23b64cc8 
							
						 
					 
					
						
						
							
							Refactor training, with new spacy.train module. Defaults still a little awkward.  
						
						
						
					 
					
						2016-10-09 12:24:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							53fbd3dd1c 
							
						 
					 
					
						
						
							
							Fix train.py for v1.0.0-rc1  
						
						
						
					 
					
						2016-10-05 01:11:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ae202e7a60 
							
						 
					 
					
						
						
							
							Fix init_model.py  
						
						
						
					 
					
						2016-09-25 15:58:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af847e07fc 
							
						 
					 
					
						
						
							
							Fix usage of pathlib for Python3 -- turning paths to strings.  
						
						
						
					 
					
						2016-09-24 21:05:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d310dc73ef 
							
						 
					 
					
						
						
							
							Fix bin/init_model.py after refactoring  
						
						
						
					 
					
						2016-09-24 20:38:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8036368d96 
							
						 
					 
					
						
						
							
							* Fix model saving  
						
						
						
					 
					
						2016-05-23 12:01:46 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							35214053fd 
							
						 
					 
					
						
						
							
							* Work around get_lex_attr bug introduced during German parsing  
						
						
						
					 
					
						2016-05-23 10:53:00 +00:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							dae6bc05eb 
							
						 
					 
					
						
						
							
							define German dummy lemmatizer until morphology is done  
						
						
						
					 
					
						2016-05-02 16:04:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8569dbc2d0 
							
						 
					 
					
						
						
							
							* Add initial stuff for Chinese parsing  
						
						
						
					 
					
						2016-04-24 18:44:24 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							f9150ccf2a 
							
						 
					 
					
						
						
							
							rename vectors.tgz to vectors.bz2 because it's not compressed with gzip but bzip  
						
						
						
					 
					
						2016-04-08 13:38:07 +02:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							a8f4e49900 
							
						 
					 
					
						
						
							
							update init_model.py to previous (better) state  
						
						
						
					 
					
						2016-03-29 16:12:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d249e2f7f3 
							
						 
					 
					
						
						
							
							* Improve error message in bin/parser/train.py  
						
						
						
					 
					
						2016-03-29 13:04:33 +11:00 
						 
				 
			
				
					
						
							
							
								Yaser Martinez Palenzuela 
							
						 
					 
					
						
						
						
						
							
						
						
							3c210f45fa 
							
						 
					 
					
						
						
							
							make use of log_smooth_count  
						
						
						
					 
					
						2016-03-17 12:19:52 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fcaa0ad7ce 
							
						 
					 
					
						
						
							
							Merge pull request  #280  from wbwseeker/german_parser  
						
						... 
						
						
						
						German parser 
						
					 
					
						2016-03-04 03:27:42 +11:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							690c5acabf 
							
						 
					 
					
						
						
							
							adjust train.py to train both english and german models  
						
						
						
					 
					
						2016-03-03 15:21:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9d51e4d13c 
							
						 
					 
					
						
						
							
							Delete gather_freqs.py  
						
						... 
						
						
						
						This script was in a broken state, and should be unnecessary. The functionality is subsumed by `get_freqs.py` 
						
					 
					
						2016-03-02 00:42:55 +11:00 
						 
				 
			
				
					
						
							
							
								Yaser Martinez Palenzuela 
							
						 
					 
					
						
						
						
						
							
						
						
							1a93d7f725 
							
						 
					 
					
						
						
							
							replace codecs.open with io.open  
						
						
						
					 
					
						2016-03-01 14:10:11 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							eae35e9b27 
							
						 
					 
					
						
						
							
							add tokenizer files for German, add/change code to train German pos tagger  
						
						... 
						
						
						
						- add files to specify rules for German tokenization
- change generate_specials.py to generate from an external file (abbrev.de.tab)
- copy gazetteer.json from lang_data/en/
- init_model.py
	- change doc freq threshold to 0
- add train_german_tagger.py
	- expects conll09-formatted input 
						
					 
					
						2016-02-18 13:24:20 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							a89ca6537b 
							
						 
					 
					
						
						
							
							fix cythonize  
						
						
						
					 
					
						2016-02-05 16:17:23 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							3a50448bf3 
							
						 
					 
					
						
						
							
							py3 compatibility  
						
						
						
					 
					
						2016-02-05 15:43:50 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							7627969aba 
							
						 
					 
					
						
						
							
							refactor, listen on setup.py, *.pxd  
						
						
						
					 
					
						2016-02-05 15:37:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5dc6cffc67 
							
						 
					 
					
						
						
							
							* Fix gather_freqs.py  
						
						
						
					 
					
						2016-02-04 20:21:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e2ed6251d7 
							
						 
					 
					
						
						
							
							* Fancy up the CLI for the conll train script  
						
						
						
					 
					
						2016-02-02 22:58:06 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a676d66807 
							
						 
					 
					
						
						
							
							* Update the CoNLL train script, to get working on other languages  
						
						
						
					 
					
						2016-02-02 22:29:34 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							73674a4afb 
							
						 
					 
					
						
						
							
							try using system-wide headers  
						
						
						
					 
					
						2015-12-13 12:51:23 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							92fabd0114 
							
						 
					 
					
						
						
							
							wrap virtualenv around cythonize  
						
						
						
					 
					
						2015-12-13 12:32:22 +01:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							9662cf04c9 
							
						 
					 
					
						
						
							
							new approach to dependency headers  
						
						
						
					 
					
						2015-12-13 11:53:02 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e68b344c1 
							
						 
					 
					
						
						
							
							* Train after parsing, not before.  
						
						
						
					 
					
						2015-11-12 04:43:52 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4fb038a9eb 
							
						 
					 
					
						
						
							
							* Update conll_train.py script for spaCy v0.97  
						
						
						
					 
					
						2015-10-31 00:53:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cfaa4bde5d 
							
						 
					 
					
						
						
							
							* Add train and parse scripts that use CoNLL formatted data  
						
						
						
					 
					
						2015-10-30 12:54:49 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2348a08481 
							
						 
					 
					
						
						
							
							* Load/dump strings with a json file, instead of the hacky strings file we were using.  
						
						
						
					 
					
						2015-10-22 21:13:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ce12e4548 
							
						 
					 
					
						
						
							
							* Import io in get_freqs  
						
						
						
					 
					
						2015-10-19 12:56:18 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17fffb4c57 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:33:49 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ff4454177 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:31:15 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a748146dd3 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:24:50 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a29fd79fbc 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:24:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e08a4b46a2 
							
						 
					 
					
						
						
							
							* Update get_freqs.py script  
						
						
						
					 
					
						2015-10-16 04:20:35 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							92f750cf8b 
							
						 
					 
					
						
						
							
							* Use a gzipped frequencies file in init_model  
						
						
						
					 
					
						2015-10-11 06:59:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							064bd69ad0 
							
						 
					 
					
						
						
							
							* Refactor symbols, so that frequency rank can be derived from the orth id of a word.  
						
						
						
					 
					
						2015-10-10 16:03:48 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83dccf0fd7 
							
						 
					 
					
						
						
							
							* Use io module insteads of deprecated codecs module  
						
						
						
					 
					
						2015-10-10 14:13:01 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f35632e2e5 
							
						 
					 
					
						
						
							
							* Remove SBD print statement in train, after SBD evaluation was removed from Scorer  
						
						
						
					 
					
						2015-10-09 11:08:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6ea1601e93 
							
						 
					 
					
						
						
							
							* Add script to train models off the UD treebanks. Note that the UD data is restricted to research purposes only, and should only be used to train models for academic experiments.  
						
						
						
					 
					
						2015-10-08 12:01:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c503654ec1 
							
						 
					 
					
						
						
							
							* Update bin/parser/train for printing output.  
						
						
						
					 
					
						2015-10-06 10:35:22 +11:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							8caedba42a 
							
						 
					 
					
						
						
							
							caught more codecs.open -> io.open  
						
						
						
					 
					
						2015-09-30 20:20:09 +02:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							764bdc62e7 
							
						 
					 
					
						
						
							
							caught another codecs.open  
						
						
						
					 
					
						2015-09-30 20:16:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1ae55cb63a 
							
						 
					 
					
						
						
							
							* Copy tag_map.json in init_model  
						
						
						
					 
					
						2015-09-12 05:54:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b2e82e55f6 
							
						 
					 
					
						
						
							
							* Create POS model dir in training script  
						
						
						
					 
					
						2015-09-08 15:36:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ad4527c42 
							
						 
					 
					
						
						
							
							* Rename Deutsch to German  
						
						
						
					 
					
						2015-09-06 20:18:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d1eea2d865 
							
						 
					 
					
						
						
							
							* Update train.py for language-generic spaCy  
						
						
						
					 
					
						2015-09-06 17:51:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							950ce36660 
							
						 
					 
					
						
						
							
							* Update init model  
						
						
						
					 
					
						2015-09-06 17:51:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b6b1e1aa12 
							
						 
					 
					
						
						
							
							* Add link for Finnish model  
						
						
						
					 
					
						2015-08-27 10:26:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							320ced276a 
							
						 
					 
					
						
						
							
							* Add tagger training script  
						
						
						
					 
					
						2015-08-27 09:15:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc13edd7cb 
							
						 
					 
					
						
						
							
							* Refactor init_model to accomodate other languages  
						
						
						
					 
					
						2015-08-26 19:14:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bbf07ac253 
							
						 
					 
					
						
						
							
							* Cut down init_model to work on more languages  
						
						
						
					 
					
						2015-08-24 01:05:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ecacb9635 
							
						 
					 
					
						
						
							
							* Copy gazetteer file in init_model  
						
						
						
					 
					
						2015-08-06 16:07:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ddc1a5cfe5 
							
						 
					 
					
						
						
							
							* Fix training under python3  
						
						
						
					 
					
						2015-07-28 14:09:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							174ed1ad20 
							
						 
					 
					
						
						
							
							* Tighten the frequency filter in init_model  
						
						
						
					 
					
						2015-07-27 21:44:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6047f2aa35 
							
						 
					 
					
						
						
							
							* Fix path to freqs.txt  
						
						
						
					 
					
						2015-07-27 02:22:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0368889d6c 
							
						 
					 
					
						
						
							
							* Support gzipped frequencies in init_model  
						
						
						
					 
					
						2015-07-26 22:39:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4f20847da 
							
						 
					 
					
						
						
							
							* Fix init_model for travis tests  
						
						
						
					 
					
						2015-07-26 14:03:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09312b9353 
							
						 
					 
					
						
						
							
							* Fix init_model for travis tests  
						
						
						
					 
					
						2015-07-26 13:55:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							90ad717dc4 
							
						 
					 
					
						
						
							
							* Update default freq thresholds in init_model  
						
						
						
					 
					
						2015-07-26 01:41:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a5e035a48 
							
						 
					 
					
						
						
							
							* Ensure data files are copied for tokenizer in init_model  
						
						
						
					 
					
						2015-07-26 01:36:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ab93898ac6 
							
						 
					 
					
						
						
							
							* Make heuristics more explicit in init_model  
						
						
						
					 
					
						2015-07-26 00:22:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5c04dcd7c1 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 23:33:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd525f0675 
							
						 
					 
					
						
						
							
							* Pass OOV probability around  
						
						
						
					 
					
						2015-07-25 23:29:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5b6bf4d4a6 
							
						 
					 
					
						
						
							
							* Remove probability cap on lexicon  
						
						
						
					 
					
						2015-07-25 23:05:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c62eb110c0 
							
						 
					 
					
						
						
							
							* Fix merge conflict in init_model  
						
						
						
					 
					
						2015-07-25 23:04:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0301472d15 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 22:56:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e800adfbc 
							
						 
					 
					
						
						
							
							* Fix init_model  
						
						
						
					 
					
						2015-07-25 22:54:08 +02:00