Xiaoquan Kong 
							
						 
					 
					
						
						
						
						
							
						
						
							f0c9652ed1 
							
						 
					 
					
						
						
							
							New Feature: display more detail when Error E067 ( #2639 )  
						
						... 
						
						
						
						* Fix off-by-one error
* Add verbose option
* Update verbose option
* Update documents for verbose option 
						
					 
					
						2018-08-07 10:45:29 +02:00 
						 
				 
			
				
					
						
							
							
								Kaisa (Katarzyna) Korsak 
							
						 
					 
					
						
						
						
						
							
						
						
							e531a827db 
							
						 
					 
					
						
						
							
							Changed conllu2json to be able to extract NER tags ( #2594 )  
						
						... 
						
						
						
						* extract ner tags from conllu file if available
* fixed a bug in regex 
						
					 
					
						2018-07-25 22:21:31 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d84b13e02c 
							
						 
					 
					
						
						
							
							Merge branch 'master' into develop  
						
						
						
					 
					
						2018-07-18 18:57:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ole Henrik Skogstrøm 
							
						 
					 
					
						
						
						
						
							
						
						
							6e2930a4a2 
							
						 
					 
					
						
						
							
							Conll(u)-bio converter ( #2525 )  
						
						... 
						
						
						
						* Started simple conllxbiluo converter
* Fix missing BIO to BILUO conversion 
						
					 
					
						2018-07-18 18:55:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8ae1bec8bf 
							
						 
					 
					
						
						
							
							Fix init_model  
						
						
						
					 
					
						2018-07-05 14:02:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dee8bdb900 
							
						 
					 
					
						
						
							
							Fix init-model for npz vectors  
						
						
						
					 
					
						2018-07-04 02:29:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59d655e8d0 
							
						 
					 
					
						
						
							
							Fix model init from jsonl  
						
						
						
					 
					
						2018-07-04 01:30:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1e38bea6e9 
							
						 
					 
					
						
						
							
							Save vectors init  
						
						
						
					 
					
						2018-07-03 23:55:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6692833887 
							
						 
					 
					
						
						
							
							Fix init_model  
						
						
						
					 
					
						2018-07-03 23:24:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4a38a26cb5 
							
						 
					 
					
						
						
							
							Fix init_model  
						
						
						
					 
					
						2018-07-03 22:57:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							019d09e3c3 
							
						 
					 
					
						
						
							
							Fix init model  
						
						
						
					 
					
						2018-07-03 22:16:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2543f8c93a 
							
						 
					 
					
						
						
							
							Support .npz vectors in init-model command  
						
						
						
					 
					
						2018-07-03 21:42:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							86aad11939 
							
						 
					 
					
						
						
							
							Fix init_model arg  
						
						
						
					 
					
						2018-07-03 17:00:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eff42d36e3 
							
						 
					 
					
						
						
							
							Fix init model command  
						
						
						
					 
					
						2018-07-03 16:32:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a89faf12e 
							
						 
					 
					
						
						
							
							Add support for jsonl-formatted lexical attributes to init-model command.  
						
						
						
					 
					
						2018-07-03 12:22:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c83fccfe2a 
							
						 
					 
					
						
						
							
							Fix output of best model  
						
						
						
					 
					
						2018-06-25 23:05:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							69c900f003 
							
						 
					 
					
						
						
							
							Fix init-model if no vectors provided  
						
						
						
					 
					
						2018-06-25 18:26:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							664f89327a 
							
						 
					 
					
						
						
							
							Fix init-model if no vectors provided  
						
						
						
					 
					
						2018-06-25 17:58:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4698f5712 
							
						 
					 
					
						
						
							
							Don't collate model unless training succeeds  
						
						
						
					 
					
						2018-06-25 16:36:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							24dfbb8a28 
							
						 
					 
					
						
						
							
							Fix model collation  
						
						
						
					 
					
						2018-06-25 14:35:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							62237755a4 
							
						 
					 
					
						
						
							
							Import shutil  
						
						
						
					 
					
						2018-06-25 13:40:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a040fca99e 
							
						 
					 
					
						
						
							
							Import json into cli.train  
						
						
						
					 
					
						2018-06-25 11:50:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c703d99c2 
							
						 
					 
					
						
						
							
							Fix collation of best models  
						
						
						
					 
					
						2018-06-25 01:21:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c80b7c013 
							
						 
					 
					
						
						
							
							Collate best model after training  
						
						
						
					 
					
						2018-06-24 23:39:52 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							330c039106 
							
						 
					 
					
						
						
							
							Merge branch 'master' into develop  
						
						
						
					 
					
						2018-05-26 18:30:52 +02:00 
						 
				 
			
				
					
						
							
							
								James Messinger 
							
						 
					 
					
						
						
						
						
							
						
						
							4515e96e90 
							
						 
					 
					
						
						
							
							Better formatting for spacy train CLI ( #2357 )  
						
						... 
						
						
						
						* Better formatting for `spacy train` CLI
Changed to use fixed-spaces rather than tabs to align table headers and data.
### Before:
```
Itn.    P.Loss  N.Loss  UAS     NER P.  NER R.  NER F.  Tag %   Token %
0       4618.857        2910.004        76.172  79.645  67.987  88.732  88.261  100.000 4436.9  6376.4
1       4671.972        3764.812        74.481  78.046  62.374  82.680  88.377  100.000 4672.2  6227.1
2       4742.756        3673.473        71.994  77.380  63.966  84.494  90.620  100.000 4298.0  5983.9
```
### After:
```
Itn.  Dep Loss  NER Loss  UAS     NER P.  NER R.  NER F.  Tag %   Token %  CPU WPS  GPU WPS
0     4618.857  2910.004  76.172  79.645  67.987  88.732  88.261  100.000  4436.9   6376.4
1     4671.972  3764.812  74.481  78.046  62.374  82.680  88.377  100.000  4672.2   6227.1
2     4742.756  3673.473  71.994  77.380  63.966  84.494  90.620  100.000  4298.0   5983.9
```
* Added contributor file 
						
					 
					
						2018-05-25 13:08:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ce458c2428 
							
						 
					 
					
						
						
							
							Fix spacy requirement constraint in package template  
						
						
						
					 
					
						2018-05-22 20:50:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f3b4f6a4ec 
							
						 
					 
					
						
						
							
							Merge setup.py  
						
						
						
					 
					
						2018-05-20 23:21:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d4cc736b7c 
							
						 
					 
					
						
						
							
							💫  Improve model downloads: check for existing install, customise pip and use requests library again ( #2346 )  
						
						... 
						
						
						
						* Go back to using requests instead of urllib (closes  #2320 )
Fewer dependencies are good, but this one was simply causing too many other problems around SSL verification and Python 2/3 compatibility. requests is a popular enough package that it's okay for spaCy to depend on it – and this will hopefully make model downloads less flakey.
* Only download model if not installed (see #1456 )
Use #egg=model==version to allow pip to check for existing installations. The download is only started if no installation matching the package/version is found. Fixes a long-standing inconvenience.
* Pass additional options to pip when installing model (resolves  #1456 )
Treat all additional arguments passed to the download command as pip options to allow user to customise the command. For example:
python -m spacy download en --user
* Add CLI option to enable installing model package dependencies
* Revert "Add CLI option to enable installing model package dependencies"
This reverts commit 9336ffe695 
						
					 
					
						2018-05-20 20:26:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74d5c625b3 
							
						 
					 
					
						
						
							
							Use rising beam update prob  
						
						
						
					 
					
						2018-05-16 20:11:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc1a479fbd 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/refactor-parser  
						
						
						
					 
					
						2018-05-15 18:39:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							546dd99cdf 
							
						 
					 
					
						
						
							
							Merge master into develop -- mostly Arabic and website  
						
						
						
					 
					
						2018-05-15 18:14:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ae1ee6f7 
							
						 
					 
					
						
						
							
							Don't modify Token in global scope  
						
						
						
					 
					
						2018-05-09 00:43:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f94f721f40 
							
						 
					 
					
						
						
							
							Avoid importing fused token symbol in ud-run-test, untl that's added  
						
						
						
					 
					
						2018-05-09 00:28:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							659ec5b975 
							
						 
					 
					
						
						
							
							Avoid importing fused token symbol in ud-run-test, untl that's added  
						
						
						
					 
					
						2018-05-08 19:40:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc4dd49b77 
							
						 
					 
					
						
						
							
							Support oracle segmentation in ud-train CLI command  
						
						
						
					 
					
						2018-05-08 13:47:45 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7a3599c21a 
							
						 
					 
					
						
						
							
							Fix formatting and consistency  
						
						
						
					 
					
						2018-05-07 23:02:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eddc0e0c74 
							
						 
					 
					
						
						
							
							Set gold.sent_starts in ud_train  
						
						
						
					 
					
						2018-05-07 15:52:47 +02:00 
						 
				 
			
				
					
						
							
							
								G.Pruvost 
							
						 
					 
					
						
						
						
						
							
						
						
							cc8e804648 
							
						 
					 
					
						
						
							
							#2211  - Support for ssl certs config on download command ( #2212 )  
						
						... 
						
						
						
						* Add support for SSL/Certs customization on download CLI
* Add a note on SSL options for the 'download' CLI in the README
* Add contributor agreement 
						
					 
					
						2018-05-03 18:37:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							723b328062 
							
						 
					 
					
						
						
							
							Add script to run UD test  
						
						
						
					 
					
						2018-04-29 15:50:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17af6aa3a4 
							
						 
					 
					
						
						
							
							Update ud_train script  
						
						
						
					 
					
						2018-04-29 15:49:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c4a6d66fa 
							
						 
					 
					
						
						
							
							Merge master into develop. Big merge, many conflicts -- need to review  
						
						
						
					 
					
						2018-04-29 14:49:26 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3c80f69ff5 
							
						 
					 
					
						
						
							
							Return data in cli.info and add silent option ( resolves   #2196 )  
						
						
						
					 
					
						2018-04-29 01:59:44 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0299d5fac8 
							
						 
					 
					
						
						
							
							Update argument annotations and formatting  
						
						
						
					 
					
						2018-04-10 21:45:11 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							49b1e48bf5 
							
						 
					 
					
						
						
							
							Fix syntax error  
						
						
						
					 
					
						2018-04-10 21:44:59 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							70052e46e9 
							
						 
					 
					
						
						
							
							Fix formatting [ci skip]  
						
						
						
					 
					
						2018-04-10 21:42:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0ddb152be0 
							
						 
					 
					
						
						
							
							Improve error message when reading vectors  
						
						
						
					 
					
						2018-04-10 21:26:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							db50ac524e 
							
						 
					 
					
						
						
							
							Support zipped vector files in init-model  
						
						
						
					 
					
						2018-04-10 21:21:00 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							270fcfd925 
							
						 
					 
					
						
						
							
							Fix typo in package command message ( closes   #2200 )  
						
						
						
					 
					
						2018-04-10 19:14:31 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							24d8bf348d 
							
						 
					 
					
						
						
							
							Revert "Add support for .zip to init_model"  
						
						... 
						
						
						
						This reverts commit 7ee880a0ad 
						
					 
					
						2018-04-10 19:08:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ee880a0ad 
							
						 
					 
					
						
						
							
							Add support for .zip to init_model  
						
						
						
					 
					
						2018-04-10 14:30:04 +00:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							3141e04822 
							
						 
					 
					
						
						
							
							💫  New system for error messages and warnings ( #2163 )  
						
						... 
						
						
						
						* Add spacy.errors module
* Update deprecation and user warnings
* Replace errors and asserts with new error message system
* Remove redundant asserts
* Fix whitespace
* Add messages for print/util.prints statements
* Fix typo
* Fix typos
* Move CLI messages to spacy.cli._messages
* Add decorator to display error code with message
An implementation like this is nice because it only modifies the string when it's retrieved from the containing class – so we don't have to worry about manipulating tracebacks etc.
* Remove unused link in spacy.about
* Update errors for invalid pipeline components
* Improve error for unknown factories
* Add displaCy warnings
* Update formatting consistency
* Move error message to spacy.errors
* Update errors and check if doc returned by component is None 
						
					 
					
						2018-04-03 15:50:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a609a1ca29 
							
						 
					 
					
						
						
							
							Merge pull request  #2152  from explosion/feature/tidy-up-dependencies  
						
						... 
						
						
						
						💫  Tidy up dependencies 
					
						2018-03-29 14:35:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b5098079d8 
							
						 
					 
					
						
						
							
							Fix error on urllib  
						
						
						
					 
					
						2018-03-29 00:08:16 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							98e9cda677 
							
						 
					 
					
						
						
							
							Merge pull request  #2158  from explosion/feature/fix-multiple-vectors ( resolves   #1660 )  
						
						... 
						
						
						
						💫  Fix loading of multiple vector models 
					
						2018-03-28 23:08:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17c3e7efa2 
							
						 
					 
					
						
						
							
							Add message noting vectors  
						
						
						
					 
					
						2018-03-28 16:33:43 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7fbc9e5874 
							
						 
					 
					
						
						
							
							Replace requests with urllib  
						
						
						
					 
					
						2018-03-28 12:46:07 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ac88c72c9a 
							
						 
					 
					
						
						
							
							Fix ftfy workaround and remove old import  
						
						
						
					 
					
						2018-03-28 12:14:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							070b6c6495 
							
						 
					 
					
						
						
							
							Remove dependency on ftfy  
						
						
						
					 
					
						2018-03-28 12:07:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b7136cb094 
							
						 
					 
					
						
						
							
							Support zipped vector files in init-model  
						
						
						
					 
					
						2018-03-27 21:01:18 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f7229f40f 
							
						 
					 
					
						
						
							
							Revert "Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop"  
						
						... 
						
						
						
						This reverts commit c9ba3d3c2d92c26a35d4 
						
					 
					
						2018-03-27 19:23:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f57bfbccdc 
							
						 
					 
					
						
						
							
							Fix non-projective label filtering  
						
						
						
					 
					
						2018-03-27 13:41:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8bbd26579c 
							
						 
					 
					
						
						
							
							Support GPU in UD training script  
						
						
						
					 
					
						2018-03-27 09:53:35 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							406548b976 
							
						 
					 
					
						
						
							
							Support .gz and .tar.gz files in spacy init-model  
						
						
						
					 
					
						2018-03-24 17:18:32 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85717f570c 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/explosion/spaCy  
						
						
						
					 
					
						2018-03-23 20:30:42 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8902754f0b 
							
						 
					 
					
						
						
							
							Fix vector loading for ud_train  
						
						
						
					 
					
						2018-03-23 20:30:00 +01:00 
						 
				 
			
				
					
						
							
							
								Xiaoquan Kong 
							
						 
					 
					
						
						
						
						
							
						
						
							a71b99d7ff 
							
						 
					 
					
						
						
							
							bugfix for global-variable-change-in-runtime related issue ( #2135 )  
						
						... 
						
						
						
						* Bugfix: setting pollution from spacy/cli/ud_train.py to whole package
* Add contributor agreement of howl-anderson 
						
					 
					
						2018-03-23 11:36:38 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							044397e269 
							
						 
					 
					
						
						
							
							Support .gz and .tar.gz files in spacy init-model  
						
						
						
					 
					
						2018-03-21 14:33:23 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							bede11b67c 
							
						 
					 
					
						
						
							
							Improve label management in parser and NER ( #2108 )  
						
						... 
						
						
						
						This patch does a few smallish things that tighten up the training workflow a little, and allow memory use during training to be reduced by letting the GoldCorpus stream data properly.
Previously, the parser and entity recognizer read and saved labels as lists, with extra labels noted separately. Lists were used becaue ordering is very important, to ensure that the label-to-class mapping is stable.
We now manage labels as nested dictionaries, first keyed by the action, and then keyed by the label. Values are frequencies. The trick is, how do we save new labels? We need to make sure we iterate over these in the same order they're added. Otherwise, we'll get different class IDs, and the model's predictions won't make sense.
To allow stable sorting, we map the new labels to negative values. If we have two new labels, they'll be noted as having "frequency" -1 and -2. The next new label will then have "frequency" -3. When we sort by (frequency, label), we then get a stable sort.
Storing frequencies then allows us to make the next nice improvement. Previously we had to iterate over the whole training set, to pre-process it for the deprojectivisation. This led to storing the whole training set in memory. This was most of the required memory during training.
To prevent this, we now store the frequencies as we stream in the data, and deprojectivize as we go. Once we've built the frequencies, we can then apply a frequency cut-off when we decide how many classes to make.
Finally, to allow proper data streaming, we also have to have some way of shuffling the iterator. This is awkward if the training files have multiple documents in them. To solve this, the GoldCorpus class now writes the training data to disk in msgpack files, one per document. We can then shuffle the data by shuffling the paths.
This is a squash merge, as I made a lot of very small commits. Individual commit messages below.
* Simplify label management for TransitionSystem and its subclasses
* Fix serialization for new label handling format in parser
* Simplify and improve GoldCorpus class. Reduce memory use, write to temp dir
* Set actions in transition system
* Require thinc 6.11.1.dev4
* Fix error in parser init
* Add unicode declaration
* Fix unicode declaration
* Update textcat test
* Try to get model training on less memory
* Print json loc for now
* Try rapidjson to reduce memory use
* Remove rapidjson requirement
* Try rapidjson for reduced mem usage
* Handle None heads when projectivising
* Stream json docs
* Fix train script
* Handle projectivity in GoldParse
* Fix projectivity handling
* Add minibatch_by_words util from ud_train
* Minibatch by number of words in spacy.cli.train
* Move minibatch_by_words util to spacy.util
* Fix label handling
* More hacking at label management in parser
* Fix encoding in msgpack serialization in GoldParse
* Adjust batch sizes in parser training
* Fix minibatch_by_words
* Add merge_subtokens function to pipeline.pyx
* Register merge_subtokens factory
* Restore use of msgpack tmp directory
* Use minibatch-by-words in train
* Handle retokenization in scorer
* Change back-off approach for missing labels. Use 'dep' label
* Update NER for new label management
* Set NER tags for over-segmented words
* Fix label alignment in gold
* Fix label back-off for infrequent labels
* Fix int type in labels dict key
* Fix int type in labels dict key
* Update feature definition for 8 feature set
* Update ud-train script for new label stuff
* Fix json streamer
* Print the line number if conll eval fails
* Update children and sentence boundaries after deprojectivisation
* Export set_children_from_heads from doc.pxd
* Render parses during UD training
* Remove print statement
* Require thinc 6.11.1.dev6. Try adding wheel as install_requires
* Set different dev version, to flush pip cache
* Update thinc version
* Update GoldCorpus docs
* Remove print statements
* Fix formatting and links [ci skip] 
						
					 
					
						2018-03-19 02:58:08 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d7ce6527fb 
							
						 
					 
					
						
						
							
							Use increasing batch sizes in ud-train  
						
						
						
					 
					
						2018-03-14 20:15:28 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5dddb30e5b 
							
						 
					 
					
						
						
							
							Fix ud-train script  
						
						
						
					 
					
						2018-03-11 01:26:45 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2cab4d6517 
							
						 
					 
					
						
						
							
							Remove use of attr module in ud_train  
						
						
						
					 
					
						2018-03-11 00:59:39 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							754ea1b2f7 
							
						 
					 
					
						
						
							
							Link in spaCy CoNLL commands  
						
						
						
					 
					
						2018-03-10 23:42:15 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3478ea76d1 
							
						 
					 
					
						
						
							
							Add ud_train and ud_evaluate CLI commands  
						
						
						
					 
					
						2018-03-10 23:41:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b59765ca9f 
							
						 
					 
					
						
						
							
							Stream gold during spacy train  
						
						
						
					 
					
						2018-03-10 22:32:45 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							86405e4ad1 
							
						 
					 
					
						
						
							
							Fix CLI for multitask objectives  
						
						
						
					 
					
						2018-02-18 10:59:11 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a34749b2bf 
							
						 
					 
					
						
						
							
							Add multitask objectives options to train CLI  
						
						
						
					 
					
						2018-02-17 22:03:54 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							262d0a3148 
							
						 
					 
					
						
						
							
							Fix overwriting of lexical attributes when loading vectors during training  
						
						
						
					 
					
						2018-02-17 18:11:11 +01:00 
						 
				 
			
				
					
						
							
							
								Johannes Dollinger 
							
						 
					 
					
						
						
						
						
							
						
						
							bf94c13382 
							
						 
					 
					
						
						
							
							Don't fix random seeds on import  
						
						
						
					 
					
						2018-02-13 12:42:23 +01:00 
						 
				 
			
				
					
						
							
							
								Ali Zarezade 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9df9da34a3 
							
						 
					 
					
						
						
							
							Fix init_model issue  
						
						... 
						
						
						
						Fixing issue #1928  
						
					 
					
						2018-02-03 17:21:34 +03:30 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3c1fb9d02d 
							
						 
					 
					
						
						
							
							Make validate command fail more gracefully if version not found  
						
						... 
						
						
						
						Mostly relevant during develoment when working with .dev versions 
						
					 
					
						2018-01-31 22:06:28 +01:00 
						 
				 
			
				
					
						
							
							
								Adam Binford 
							
						 
					 
					
						
						
						
						
							
						
						
							1a2c2f7d7f 
							
						 
					 
					
						
						
							
							Fixed auto linking after download and added simple test to check  
						
						
						
					 
					
						2018-01-29 14:25:21 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							7ca49c2061 
							
						 
					 
					
						
						
							
							Merge branch 'master' into feature-improve-model-download  
						
						
						
					 
					
						2018-01-10 18:21:55 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							10dab8eef8 
							
						 
					 
					
						
						
							
							Remove dummy variable from function calls  
						
						
						
					 
					
						2018-01-05 09:37:05 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							7f0ab145e9 
							
						 
					 
					
						
						
							
							Don't pass CLI command name as dummy argument  
						
						
						
					 
					
						2018-01-04 21:33:47 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							2c656f90fb 
							
						 
					 
					
						
						
							
							Exit with 1 if incompatible models found (see  #1714 )  
						
						
						
					 
					
						2018-01-03 21:20:35 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							dacfaa2ca4 
							
						 
					 
					
						
						
							
							Ensure that download command exits properly ( resolves   #1714 )  
						
						
						
					 
					
						2018-01-03 21:03:36 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							a9ff6eadc9 
							
						 
					 
					
						
						
							
							Prefix dummy argument names with underscore  
						
						
						
					 
					
						2018-01-03 20:48:12 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1081e08efb 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2018-01-03 20:14:50 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d8109964d6 
							
						 
					 
					
						
						
							
							Use --no-deps on model install  
						
						... 
						
						
						
						In general, it's nice for models to specify spaCy as a dependency. However, this tends to cause problems in conda environments, as pip will re-install spaCy and its dependencies (especially Thinc) 
						
					 
					
						2018-01-03 17:40:37 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							319d754309 
							
						 
					 
					
						
						
							
							Fix overwriting of existing symlinks  
						
						... 
						
						
						
						Check for is_symlink() to also overwrite invalid and outdated symlinks. Also show better error message if link path exists but is not symlink (i.e. file or directory). 
						
					 
					
						2018-01-03 17:39:36 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8ba0dfd017 
							
						 
					 
					
						
						
							
							Make message on failed linking more clear  
						
						
						
					 
					
						2018-01-03 17:38:09 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							d6327e8495 
							
						 
					 
					
						
						
							
							Fix handling case when vectors not specified  
						
						
						
					 
					
						2018-01-03 12:20:49 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							bcc51d7d8b 
							
						 
					 
					
						
						
							
							Fix shifted positional arguments  
						
						
						
					 
					
						2018-01-03 12:19:47 +01:00 
						 
				 
			
				
					
						
							
							
								Søren Lind Kristiansen 
							
						 
					 
					
						
						
						
						
							
						
						
							5a9d377580 
							
						 
					 
					
						
						
							
							Remove abbreviation for positional plac argument  
						
						
						
					 
					
						2017-12-11 11:08:29 +01:00 
						 
				 
			
				
					
						
							
							
								Isaac Sijaranamual 
							
						 
					 
					
						
						
						
						
							
						
						
							20ae0c459a 
							
						 
					 
					
						
						
							
							Fixes "Error saving model"  #1622  
						
						
						
					 
					
						2017-12-10 23:07:13 +01:00 
						 
				 
			
				
					
						
							
							
								Isaac Sijaranamual 
							
						 
					 
					
						
						
						
						
							
						
						
							e188b61960 
							
						 
					 
					
						
						
							
							Make cli/train.py not eat exception  
						
						
						
					 
					
						2017-12-10 22:53:08 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							5eaa61c2b8 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-12-07 10:23:09 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							24e80c51b8 
							
						 
					 
					
						
						
							
							Document init-model command  
						
						
						
					 
					
						2017-12-07 10:14:37 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c91f451b0f 
							
						 
					 
					
						
						
							
							Fix imports and CLI in init-model  
						
						
						
					 
					
						2017-12-07 10:03:07 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							82e80ff928 
							
						 
					 
					
						
						
							
							Rename model command to init_model and fix formatting  
						
						
						
					 
					
						2017-12-07 09:59:23 +01:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							2feeb428d6 
							
						 
					 
					
						
						
							
							Merge pull request  #1646  from GreenRiverRUS/master  
						
						... 
						
						
						
						Added model command to create models from raw data 
						
					 
					
						2017-12-07 08:54:26 +00:00 
						 
				 
			
				
					
						
							
							
								Thomas Werkmeister 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							94eac75b7c 
							
						 
					 
					
						
						
							
							fix setup.py spacy req string for packaging  
						
						... 
						
						
						
						Requirement should be `spacy>=2.0.2` instead of `spacy2.0.2` 
						
					 
					
						2017-12-03 04:16:28 -06:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							495eacf470 
							
						 
					 
					
						
						
							
							Merge branch 'model_command'  
						
						
						
					 
					
						2017-11-30 12:30:26 +03:00 
						 
				 
			
				
					
						
							
							
								Vadim Mazaev 
							
						 
					 
					
						
						
						
						
							
						
						
							c332ffdde1 
							
						 
					 
					
						
						
							
							Added model command to create model from raw data:  
						
						... 
						
						
						
						words counts, brown clusters and vectors 
						
					 
					
						2017-11-27 01:21:47 +03:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2acc907d55 
							
						 
					 
					
						
						
							
							Improve profiling  
						
						
						
					 
					
						2017-11-23 12:33:03 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8d692771f6 
							
						 
					 
					
						
						
							
							Improve profiling  
						
						
						
					 
					
						2017-11-15 13:51:25 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							4c5d2c80d5 
							
						 
					 
					
						
						
							
							Re-add python -m to commands, too brittle :( (see  #1536 )  
						
						
						
					 
					
						2017-11-10 02:30:55 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							de45702bbe 
							
						 
					 
					
						
						
							
							Strip dev suffixes from version for compatibility check  
						
						
						
					 
					
						2017-11-08 18:40:21 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a2f980de4e 
							
						 
					 
					
						
						
							
							Exclude .devN versioning from compatibility check  
						
						
						
					 
					
						2017-11-08 18:03:52 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a4662a31a9 
							
						 
					 
					
						
						
							
							Move model package templates to cli.package and update docs  
						
						
						
					 
					
						2017-11-07 12:15:35 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2bbf076a4 
							
						 
					 
					
						
						
							
							Add document length cap for training  
						
						
						
					 
					
						2017-11-03 01:54:54 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eca41f0cf6 
							
						 
					 
					
						
						
							
							Fix filename conversion for conllu  
						
						
						
					 
					
						2017-11-01 21:26:49 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e237472cdc 
							
						 
					 
					
						
						
							
							Fix tag and filename conversion for conllu  
						
						
						
					 
					
						2017-11-01 21:25:33 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							affd3404ab 
							
						 
					 
					
						
						
							
							Remove old model command (now "vocab")  
						
						
						
					 
					
						2017-11-01 13:14:03 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							37e62ab0e2 
							
						 
					 
					
						
						
							
							Update vector meta in meta.json  
						
						
						
					 
					
						2017-11-01 01:25:09 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c390f2d745 
							
						 
					 
					
						
						
							
							Make it easier to pass explicit no-pruning to vocab  
						
						
						
					 
					
						2017-10-31 20:14:47 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3659a807b0 
							
						 
					 
					
						
						
							
							Remove vector pruning arg from train CLI  
						
						
						
					 
					
						2017-10-31 19:21:05 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59203a2e8a 
							
						 
					 
					
						
						
							
							Move vector pruning command into spacy vocab cli tool  
						
						
						
					 
					
						2017-10-31 19:10:01 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							803e41bc66 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-10-30 18:39:51 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							abf8aa05d3 
							
						 
					 
					
						
						
							
							Populate --create-meta defaults from file if available  
						
						... 
						
						
						
						If meta.json is found in directory and user chooses to overwrite it, show existing data as defaults. 
						
					 
					
						2017-10-30 18:39:38 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ce98fa7934 
							
						 
					 
					
						
						
							
							Fix formatting  
						
						
						
					 
					
						2017-10-30 18:38:55 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							98c35d2585 
							
						 
					 
					
						
						
							
							Fix spacy vocab command  
						
						
						
					 
					
						2017-10-30 18:38:41 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e98451b5f7 
							
						 
					 
					
						
						
							
							Add -prune-vectors argument to spacy.cly.train  
						
						
						
					 
					
						2017-10-30 18:00:10 +01:00 
						 
				 
			
				
					
						
							
							
								Explosion Bot 
							
						 
					 
					
						
						
						
						
							
						
						
							05a1dd570e 
							
						 
					 
					
						
						
							
							Fix vocab script  
						
						
						
					 
					
						2017-10-30 16:19:22 +01:00 
						 
				 
			
				
					
						
							
							
								Explosion Bot 
							
						 
					 
					
						
						
						
						
							
						
						
							b46bdce8d2 
							
						 
					 
					
						
						
							
							Add missing import  
						
						
						
					 
					
						2017-10-30 16:18:10 +01:00 
						 
				 
			
				
					
						
							
							
								Explosion Bot 
							
						 
					 
					
						
						
						
						
							
						
						
							0fc1209421 
							
						 
					 
					
						
						
							
							Wire up new vocab command  
						
						
						
					 
					
						2017-10-30 16:14:50 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							64e4ff7c4b 
							
						 
					 
					
						
						
							
							Merge 'tidy-up' changes into branch. Resolve conflicts  
						
						
						
					 
					
						2017-10-28 13:16:06 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d941fc3667 
							
						 
					 
					
						
						
							
							Tidy up CLI  
						
						
						
					 
					
						2017-10-27 14:38:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							531142a933 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into feature/better-parser  
						
						
						
					 
					
						2017-10-27 12:34:48 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b9616419e1 
							
						 
					 
					
						
						
							
							Add try/except around bz2 import  
						
						
						
					 
					
						2017-10-27 01:18:05 +00:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							11e3f19764 
							
						 
					 
					
						
						
							
							Fix vectors data added after training (see  #1457 )  
						
						
						
					 
					
						2017-10-25 16:08:26 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							057954695b 
							
						 
					 
					
						
						
							
							Read pipeline and vector data off model in --generate-meta  
						
						
						
					 
					
						2017-10-25 16:03:26 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							273e638183 
							
						 
					 
					
						
						
							
							Add vector data to model meta after training (see  #1457 )  
						
						
						
					 
					
						2017-10-25 16:03:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							95f6174516 
							
						 
					 
					
						
						
							
							Remove tensorizer from model pipeline example in spacy package  
						
						
						
					 
					
						2017-10-24 16:00:56 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							24512420b1 
							
						 
					 
					
						
						
							
							Show error if data_path does not exist or is None (see  #1102 )  
						
						
						
					 
					
						2017-10-19 00:53:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc01acd821 
							
						 
					 
					
						
						
							
							Escape encoding in validate function  
						
						
						
					 
					
						2017-10-12 22:23:21 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fff1028391 
							
						 
					 
					
						
						
							
							Add validate CLI command  
						
						
						
					 
					
						2017-10-12 20:05:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a955843684 
							
						 
					 
					
						
						
							
							Increase default number of epochs  
						
						
						
					 
					
						2017-10-12 13:13:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							acba2e1051 
							
						 
					 
					
						
						
							
							Fix metadata in training  
						
						
						
					 
					
						2017-10-11 08:55:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74c2c6a58c 
							
						 
					 
					
						
						
							
							Add default name and lang to meta  
						
						
						
					 
					
						2017-10-11 08:49:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5156074df1 
							
						 
					 
					
						
						
							
							Make loading code more consistent in train command  
						
						
						
					 
					
						2017-10-10 12:51:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							97c9b5db8b 
							
						 
					 
					
						
						
							
							Patch spacy.train for new pipeline management  
						
						
						
					 
					
						2017-10-09 23:41:16 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a635240398 
							
						 
					 
					
						
						
							
							Add conll_ner2json converter  
						
						
						
					 
					
						2017-10-09 22:03:26 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							735d18654d 
							
						 
					 
					
						
						
							
							Add NER converter for CoNLL 2003 data  
						
						
						
					 
					
						2017-10-09 20:06:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							808d8740d6 
							
						 
					 
					
						
						
							
							Remove print statement  
						
						
						
					 
					
						2017-10-09 08:45:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0f41b25f60 
							
						 
					 
					
						
						
							
							Add speed benchmarks to metadata  
						
						
						
					 
					
						2017-10-09 08:05:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							be4f0b6460 
							
						 
					 
					
						
						
							
							Update defaults  
						
						
						
					 
					
						2017-10-08 02:08:12 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9d66a915da 
							
						 
					 
					
						
						
							
							Update training defaults  
						
						
						
					 
					
						2017-10-07 21:02:38 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09442d25ec 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'origin/develop' into feature/parser-history-model  
						
						
						
					 
					
						2017-10-07 07:05:04 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f4c9a98166 
							
						 
					 
					
						
						
							
							Fix spacy evaluate command on non-GPU  
						
						
						
					 
					
						2017-10-06 13:17:47 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c6cd81f192 
							
						 
					 
					
						
						
							
							Wrap try/except around model saving  
						
						
						
					 
					
						2017-10-05 08:14:24 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5743b06e36 
							
						 
					 
					
						
						
							
							Wrap model saving in try/except  
						
						
						
					 
					
						2017-10-05 08:12:50 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							73ac0aa0b5 
							
						 
					 
					
						
						
							
							Update spacy evaluate and add displaCy option  
						
						
						
					 
					
						2017-10-04 00:03:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f24c2e3a8a 
							
						 
					 
					
						
						
							
							Fix evaluate for non-GPU  
						
						
						
					 
					
						2017-10-03 22:47:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1289187279 
							
						 
					 
					
						
						
							
							Fix circular import  
						
						
						
					 
					
						2017-10-03 09:33:21 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a44c4c3a5b 
							
						 
					 
					
						
						
							
							Add timer to evaluate  
						
						
						
					 
					
						2017-10-03 09:15:35 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8902df44de 
							
						 
					 
					
						
						
							
							Fix component disabling during training  
						
						
						
					 
					
						2017-10-02 21:07:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c617d288d8 
							
						 
					 
					
						
						
							
							Update pipeline component names in spaCy train  
						
						
						
					 
					
						2017-10-02 17:20:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f942903429 
							
						 
					 
					
						
						
							
							Improve sentence merging in iob2json  
						
						
						
					 
					
						2017-10-02 17:02:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							31681d20e0 
							
						 
					 
					
						
						
							
							Fix concatenation in iob2json converter  
						
						
						
					 
					
						2017-10-02 16:50:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4896ce3320 
							
						 
					 
					
						
						
							
							Remove misleading comment  
						
						
						
					 
					
						2017-10-02 00:09:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							94df115a81 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-10-01 14:06:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							69c7c642c2 
							
						 
					 
					
						
						
							
							Add spacy evaluate  
						
						
						
					 
					
						2017-10-01 14:05:04 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fd1a9225d8 
							
						 
					 
					
						
						
							
							Handle conversion of pipeline components correctly  
						
						... 
						
						
						
						Allow both comma and comma + whitespace as separators 
						
					 
					
						2017-09-29 20:52:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac8481a7b0 
							
						 
					 
					
						
						
							
							Print NER loss  
						
						
						
					 
					
						2017-09-28 08:05:31 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							542ebfa498 
							
						 
					 
					
						
						
							
							Improve defaults  
						
						
						
					 
					
						2017-09-27 18:54:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dcb86bdc43 
							
						 
					 
					
						
						
							
							Default batch size to 32  
						
						
						
					 
					
						2017-09-27 11:48:19 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1ff62eaee7 
							
						 
					 
					
						
						
							
							Fix option shortcut to avoid conflict  
						
						
						
					 
					
						2017-09-26 17:59:34 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7fdfb78141 
							
						 
					 
					
						
						
							
							Add version option to cli.train  
						
						
						
					 
					
						2017-09-26 17:34:52 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							698fc0d016 
							
						 
					 
					
						
						
							
							Remove merge artefact  
						
						
						
					 
					
						2017-09-26 08:31:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							defb68e94f 
							
						 
					 
					
						
						
							
							Update feature/noshare with recent develop changes  
						
						
						
					 
					
						2017-09-26 08:15:14 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							edf7e4881d 
							
						 
					 
					
						
						
							
							Add meta.json option to cli.train and add relevant properties  
						
						... 
						
						
						
						Add accuracy scores to meta.json instead of accuracy.json and replace
all relevant properties like lang, pipeline, spacy_version in existing
meta.json. If not present, also add name and version placeholders to
make it packagable. 
						
					 
					
						2017-09-25 19:00:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							204b58c864 
							
						 
					 
					
						
						
							
							Fix evaluation during training  
						
						
						
					 
					
						2017-09-24 05:01:03 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc3a623d00 
							
						 
					 
					
						
						
							
							Remove unused update_shared argument  
						
						
						
					 
					
						2017-09-24 05:00:37 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4348c479fc 
							
						 
					 
					
						
						
							
							Merge pre-trained vectors and noshare patches  
						
						
						
					 
					
						2017-09-22 20:07:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e93d43a43a 
							
						 
					 
					
						
						
							
							Fix training with preset vectors  
						
						
						
					 
					
						2017-09-22 20:00:40 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a2357cce3f 
							
						 
					 
					
						
						
							
							Set random seed in train script  
						
						
						
					 
					
						2017-09-23 02:57:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0a9016cade 
							
						 
					 
					
						
						
							
							Fix serialization during training  
						
						
						
					 
					
						2017-09-21 13:06:45 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20193371f5 
							
						 
					 
					
						
						
							
							Don't share CNN, to reduce complexities  
						
						
						
					 
					
						2017-09-21 14:59:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1d73dec8b1 
							
						 
					 
					
						
						
							
							Refactor train script  
						
						
						
					 
					
						2017-09-20 19:17:10 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a0c4b33d03 
							
						 
					 
					
						
						
							
							Support resuming a model during spacy train  
						
						
						
					 
					
						2017-09-18 18:04:47 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8496d76224 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-09-14 09:21:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							24ff6b0ad9 
							
						 
					 
					
						
						
							
							Fix parsing and tok2vec models  
						
						
						
					 
					
						2017-09-06 05:50:58 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e920885676 
							
						 
					 
					
						
						
							
							Fix pickle during train  
						
						
						
					 
					
						2017-09-02 12:46:01 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7e04b7f89c 
							
						 
					 
					
						
						
							
							Fix info text on pipeline in package cli  
						
						
						
					 
					
						2017-08-26 18:30:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							876f38c548 
							
						 
					 
					
						
						
							
							Merge pull request  #1279  from oroszgy/model_cli_v2  
						
						... 
						
						
						
						Added vector loading to model cli 
						
					 
					
						2017-08-26 15:57:50 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bb1abbeba5 
							
						 
					 
					
						
						
							
							Only link model if download was successfull  
						
						
						
					 
					
						2017-08-23 12:36:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7be5f30f17 
							
						 
					 
					
						
						
							
							Add profile function  
						
						
						
					 
					
						2017-08-21 23:22:49 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							b3576bfc86 
							
						 
					 
					
						
						
							
							Added vector leading to model cli  
						
						
						
					 
					
						2017-08-20 23:16:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7a6edeea68 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-08-20 12:55:39 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f2f9229964 
							
						 
					 
					
						
						
							
							Fix name of update_shared flag  
						
						
						
					 
					
						2017-08-20 18:19:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							80a5146ec2 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-08-20 11:07:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							84bb543e4d 
							
						 
					 
					
						
						
							
							Add gold_preproc flag to cli/train  
						
						
						
					 
					
						2017-08-20 11:07:00 -05:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							e5344b83a3 
							
						 
					 
					
						
						
							
							Ported model cli from v1  
						
						
						
					 
					
						2017-08-19 21:45:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							11c31d285c 
							
						 
					 
					
						
						
							
							Restore changes from nn-beam-parser  
						
						
						
					 
					
						2017-08-18 22:26:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							52c180ecf5 
							
						 
					 
					
						
						
							
							Revert "Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop"  
						
						... 
						
						
						
						This reverts commit ea8de11ad508e443e083 
						
					 
					
						2017-08-14 13:00:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4ae0d5e1e6 
							
						 
					 
					
						
						
							
							Set defaults for convert command  
						
						
						
					 
					
						2017-08-13 09:03:38 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d4f2baf7dd 
							
						 
					 
					
						
						
							
							Add create_meta option to package command  
						
						... 
						
						
						
						Re-create meta.json in model directory, even if it exists. Especially
useful when updating existing spaCy models or training with Prodigy.
Ensures user won't end up with multiple "en_core_web_sm" models, and
offers easy way to change the model's name and settings without having
to edit the meta.json file. 
						
					 
					
						2017-08-12 21:44:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8870d491f1 
							
						 
					 
					
						
						
							
							Remove redundant pickling during training  
						
						
						
					 
					
						2017-08-12 08:55:53 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							28e2fec23b 
							
						 
					 
					
						
						
							
							Fix autolinking failure on fresh model install ( resolves   #1138 )  
						
						... 
						
						
						
						On fresh install via subprocess, pip.get_installed_distributions()
won't show new model, so is_package check in link command fails.
Solution for now is to get model package path explicitly and pass it to
link command. 
						
					 
					
						2017-08-09 11:52:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0a566dc320 
							
						 
					 
					
						
						
							
							Add update_tensors flag to Language.update. Experimental, re  #1182  
						
						
						
					 
					
						2017-08-06 02:18:12 +02:00 
						 
				 
			
				
					
						
							
							
								György Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							62dbf9025c 
							
						 
					 
					
						
						
							
							Fixed conllu converter  
						
						
						
					 
					
						2017-06-09 22:53:56 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							03db56f48c 
							
						 
					 
					
						
						
							
							Detect spaCy version and add package title  
						
						... 
						
						
						
						Package title allows customised package names (like spacy-nightly) 
						
					 
					
						2017-06-05 20:11:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c52fde40f4 
							
						 
					 
					
						
						
							
							Improve train CLI  
						
						
						
					 
					
						2017-06-04 20:18:37 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							848e47669e 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2017-06-04 20:44:15 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7b7d46b64e 
							
						 
					 
					
						
						
							
							Fix typo and success message  
						
						
						
					 
					
						2017-06-04 13:45:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							21eef90dbc 
							
						 
					 
					
						
						
							
							Support specifying which GPU  
						
						
						
					 
					
						2017-06-03 16:10:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							43353b5413 
							
						 
					 
					
						
						
							
							Improve train  CLI script  
						
						
						
					 
					
						2017-06-03 13:28:20 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e5ae6ccf4e 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2017-06-01 16:46:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a693c2605 
							
						 
					 
					
						
						
							
							Write binary file during training  
						
						
						
					 
					
						2017-05-31 02:59:18 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9e83a17e95 
							
						 
					 
					
						
						
							
							Use new model templates  
						
						
						
					 
					
						2017-05-29 15:27:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8a24c60c1e 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-28 08:12:05 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5cf47b847b 
							
						 
					 
					
						
						
							
							Handle iob with no tag in converter  
						
						
						
					 
					
						2017-05-28 08:11:39 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c1983621fb 
							
						 
					 
					
						
						
							
							Update util functions for model loading  
						
						
						
					 
					
						2017-05-28 00:22:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							49235017bf 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-27 16:34:28 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5e4312feed 
							
						 
					 
					
						
						
							
							Evaluate loaded class, to ensure save/load works  
						
						
						
					 
					
						2017-05-27 15:47:02 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7cc9c3e9a6 
							
						 
					 
					
						
						
							
							Fix convert CLI  
						
						
						
					 
					
						2017-05-27 15:44:42 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1203959625 
							
						 
					 
					
						
						
							
							Add pipeline setting to meta.json generator  
						
						
						
					 
					
						2017-05-27 20:02:01 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							086a06e7d7 
							
						 
					 
					
						
						
							
							Fix CLI docstrings and add command as first argument  
						
						... 
						
						
						
						Workaround for Plac 
						
					 
					
						2017-05-27 20:01:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc07d72d80 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/explosion/spaCy  into develop  
						
						
						
					 
					
						2017-05-27 08:20:40 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							de13fe0305 
							
						 
					 
					
						
						
							
							Remove length cap on sentences  
						
						
						
					 
					
						2017-05-27 08:20:32 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d06f235fc9 
							
						 
					 
					
						
						
							
							Fix conflict on convert.py  
						
						
						
					 
					
						2017-05-26 11:33:29 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2b3b937a04 
							
						 
					 
					
						
						
							
							Fix converter CLI  
						
						
						
					 
					
						2017-05-26 11:32:41 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5a87bcf35f 
							
						 
					 
					
						
						
							
							Fix converters  
						
						
						
					 
					
						2017-05-26 11:32:34 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d65f99a720 
							
						 
					 
					
						
						
							
							Improve model saving in train script  
						
						
						
					 
					
						2017-05-26 05:52:09 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							22d7b448a5 
							
						 
					 
					
						
						
							
							Fix convert command  
						
						
						
					 
					
						2017-05-25 19:47:12 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							df8015f05d 
							
						 
					 
					
						
						
							
							Tweaks to train script  
						
						
						
					 
					
						2017-05-25 17:15:24 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							702fe74a4d 
							
						 
					 
					
						
						
							
							Clean up spacy.cli.train  
						
						
						
					 
					
						2017-05-25 16:16:30 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							135a13790c 
							
						 
					 
					
						
						
							
							Disable gold preprocessing  
						
						
						
					 
					
						2017-05-24 20:10:20 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3959d778ac 
							
						 
					 
					
						
						
							
							Revert "Revert "WIP on improving parser efficiency""  
						
						... 
						
						
						
						This reverts commit 532afef4a8 
						
					 
					
						2017-05-23 03:06:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							532afef4a8 
							
						 
					 
					
						
						
							
							Revert "WIP on improving parser efficiency"  
						
						... 
						
						
						
						This reverts commit bdaac7ab44 
						
					 
					
						2017-05-23 03:05:25 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bdaac7ab44 
							
						 
					 
					
						
						
							
							WIP on improving parser efficiency  
						
						
						
					 
					
						2017-05-23 02:59:31 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e8dce2c05 
							
						 
					 
					
						
						
							
							Fix train command line args  
						
						
						
					 
					
						2017-05-22 10:41:39 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ae8cf70dc1 
							
						 
					 
					
						
						
							
							Fix CLI train signature  
						
						
						
					 
					
						2017-05-22 06:13:39 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							fc3ec733ea 
							
						 
					 
					
						
						
							
							Reduce complexity in CLI  
						
						... 
						
						
						
						Remove now redundant model command and move plac annotations to cli
files 
						
					 
					
						2017-05-22 12:28:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bc2294d7f1 
							
						 
					 
					
						
						
							
							Add support for fiddly hyper-parameters to train func  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4e0988605a 
							
						 
					 
					
						
						
							
							Pass through non-projective=True  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e14533757b 
							
						 
					 
					
						
						
							
							Use averaged params for evaluation  
						
						
						
					 
					
						2017-05-22 04:51:08 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5db89053aa 
							
						 
					 
					
						
						
							
							Merge docstrings  
						
						
						
					 
					
						2017-05-21 13:46:23 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							baf3ef0ddc 
							
						 
					 
					
						
						
							
							Remove import of removed train_config script  
						
						
						
					 
					
						2017-05-21 09:07:34 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4c9202249d 
							
						 
					 
					
						
						
							
							Refactor training, to fix memory leak  
						
						
						
					 
					
						2017-05-21 09:07:06 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0c6c65aa3c 
							
						 
					 
					
						
						
							
							Improve messaging if model linking fails after download  
						
						
						
					 
					
						2017-05-21 00:28:37 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e39ad78267 
							
						 
					 
					
						
						
							
							Resolve model name properly in cli.info  
						
						... 
						
						
						
						Use util.resolve_model_path() to also allow package names and paths. 
						
					 
					
						2017-05-20 12:24:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3376d4d6e8 
							
						 
					 
					
						
						
							
							Update the train script, fixing GPU memory leak  
						
						
						
					 
					
						2017-05-19 18:15:50 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							08766240c3 
							
						 
					 
					
						
						
							
							Add incomplete iob converter  
						
						
						
					 
					
						2017-05-19 13:27:51 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09a877886b 
							
						 
					 
					
						
						
							
							WIP on iob converter  
						
						
						
					 
					
						2017-05-19 13:24:39 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca70b08661 
							
						 
					 
					
						
						
							
							Fix GPU training and evaluation  
						
						
						
					 
					
						2017-05-18 08:30:33 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc8d3a112c 
							
						 
					 
					
						
						
							
							Add util.env_opt support: Can set hyper params through environment variables.  
						
						
						
					 
					
						2017-05-18 04:36:53 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							55dab77de8 
							
						 
					 
					
						
						
							
							Add conversion rule for .conll  
						
						
						
					 
					
						2017-05-17 13:13:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							793430aa7a 
							
						 
					 
					
						
						
							
							Get spaCy train command working with neural network  
						
						... 
						
						
						
						* Integrate models into pipeline
* Add basic serialization (maybe incorrect)
* Fix pickle on vocab 
						
					 
					
						2017-05-17 12:04:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3bf4a28d8d 
							
						 
					 
					
						
						
							
							Use tag in CoNLL converter, not POS  
						
						
						
					 
					
						2017-05-17 12:04:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8cf097ca88 
							
						 
					 
					
						
						
							
							Redesign training to integrate NN components  
						
						... 
						
						
						
						* Obsolete .parser, .entity etc names in favour of .pipeline
* Components no longer create models on initialization
* Models created by loading method (from_disk(), from_bytes() etc), or
    .begin_training()
* Add .predict(), .set_annotations() methods in components
* Pass state through pipeline, to allow components to share information
    more flexibly. 
						
					 
					
						2017-05-16 16:17:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5211645af3 
							
						 
					 
					
						
						
							
							Get data flowing through pipeline. Needs redesign  
						
						
						
					 
					
						2017-05-16 11:21:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a9edb3aa1d 
							
						 
					 
					
						
						
							
							Improve integration of NN parser, to support unified training API  
						
						
						
					 
					
						2017-05-15 21:53:27 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9d85cda8e4 
							
						 
					 
					
						
						
							
							Fix models error message and use about.__docs_models__ (see  #1051 )  
						
						
						
					 
					
						2017-05-13 13:05:47 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							4eefb288e3 
							
						 
					 
					
						
						
							
							Port over PR  #1055  
						
						
						
					 
					
						2017-05-13 03:25:32 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							95edd9e896 
							
						 
					 
					
						
						
							
							Let parse_package_meta take full path  
						
						
						
					 
					
						2017-05-08 15:30:48 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							59c3b9d4dd 
							
						 
					 
					
						
						
							
							Tidy up CLI and fix print functions  
						
						
						
					 
					
						2017-05-07 23:25:29 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							527d51ac9a 
							
						 
					 
					
						
						
							
							Fetch shortcuts from GitHub and improve error handling  
						
						
						
					 
					
						2017-04-26 18:00:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4f9657b42b 
							
						 
					 
					
						
						
							
							Fix reporting if no dev data with train  
						
						
						
					 
					
						2017-04-23 22:27:10 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3a9710f356 
							
						 
					 
					
						
						
							
							Pass dev_scores to print_progress correctly ( resolves   #1008 )  
						
						... 
						
						
						
						Only read scores attribute if command is used with dev_data, otherwise
default dev_scores to empty dict. 
						
					 
					
						2017-04-23 15:58:40 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							25c70b4cc5 
							
						 
					 
					
						
						
							
							Move fix_text to spacy.compat (see  #1002 )  
						
						
						
					 
					
						2017-04-20 15:47:17 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							4a06a2572c 
							
						 
					 
					
						
						
							
							Using ftfy for handling broken encoded strings.  
						
						
						
					 
					
						2017-04-20 13:34:51 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							48da244058 
							
						 
					 
					
						
						
							
							Use spacy.compat.json_dumps for Python 2/3 compatibility ( resolves   #991 )  
						
						
						
					 
					
						2017-04-19 11:50:36 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							82f5f1f98f 
							
						 
					 
					
						
						
							
							Replace str with compat.unicode_  
						
						
						
					 
					
						2017-04-17 01:29:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							17c9fffb9e 
							
						 
					 
					
						
						
							
							Fix naked except  
						
						
						
					 
					
						2017-04-16 15:28:16 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6145b7c153 
							
						 
					 
					
						
						
							
							Remove redundant Path  
						
						
						
					 
					
						2017-04-16 20:53:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							89a4f262fc 
							
						 
					 
					
						
						
							
							Fix training methods  
						
						
						
					 
					
						2017-04-16 13:00:37 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8191e33cf1 
							
						 
					 
					
						
						
							
							Update link error message with info on permissions  
						
						
						
					 
					
						2017-04-16 13:32:31 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a3ddbc0444 
							
						 
					 
					
						
						
							
							Add note about --force flag to error message  
						
						
						
					 
					
						2017-04-16 13:14:36 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e3de035814 
							
						 
					 
					
						
						
							
							Add meta validation to check for required settings  
						
						... 
						
						
						
						Complain if no "lang", "name" or "version" is found (those settings are
used in directory / package names). Package will still build without,
but it'll inevitably fail somewhere down the line. 
						
					 
					
						2017-04-16 13:13:17 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a7574b7572 
							
						 
					 
					
						
						
							
							Add more options to read in meta data in package command  
						
						... 
						
						
						
						Add meta option to supply path to meta.json. If no meta path is set,
check if meta.json exists in input directory and use it. Otherwise,
prompt for details on the command line. 
						
					 
					
						2017-04-16 13:06:02 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							13c8a42d2b 
							
						 
					 
					
						
						
							
							Fix typos  
						
						
						
					 
					
						2017-04-16 13:03:58 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							35fb4febe2 
							
						 
					 
					
						
						
							
							Fix whitespace  
						
						
						
					 
					
						2017-04-15 12:13:45 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c05ec4b89a 
							
						 
					 
					
						
						
							
							Add compat functions and remove old workarounds  
						
						... 
						
						
						
						Add ensure_path util function to handle checking instance of path 
						
					 
					
						2017-04-15 12:11:16 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d24589aa72 
							
						 
					 
					
						
						
							
							Clean up imports, unused code, whitespace, docstrings  
						
						
						
					 
					
						2017-04-15 12:05:47 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							561f2a3eb4 
							
						 
					 
					
						
						
							
							Use consistent formatting for docstrings  
						
						
						
					 
					
						2017-04-15 11:59:21 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							84341c2975 
							
						 
					 
					
						
						
							
							Only compile list of models if data_path exists  
						
						
						
					 
					
						2017-04-14 16:48:02 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							dd3244c08a 
							
						 
					 
					
						
						
							
							Made json dump to produce unicode strings in py2  
						
						
						
					 
					
						2017-04-13 23:30:47 +02:00 
						 
				 
			
				
					
						
							
							
								Gyorgy Orosz 
							
						 
					 
					
						
						
						
						
							
						
						
							a9469c8173 
							
						 
					 
					
						
						
							
							Fixed typo  
						
						
						
					 
					
						2017-04-13 15:24:14 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							41037f0f07 
							
						 
					 
					
						
						
							
							Remove unused imports  
						
						
						
					 
					
						2017-04-13 13:52:11 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1b92c8d5d5 
							
						 
					 
					
						
						
							
							Use unicode paths on Windows/Python 2 and catch other errors ( resolves   #970 )  
						
						... 
						
						
						
						try/except here is quite dirty, but it'll at least make sure users see
an error message that explains what's going on 
						
					 
					
						2017-04-10 17:49:51 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7ea1673072 
							
						 
					 
					
						
						
							
							Fix whitespace  
						
						
						
					 
					
						2017-04-07 13:28:48 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							255650dbc2 
							
						 
					 
					
						
						
							
							Add connlu2json converter from explosion/spacy-dev-resources/#11  
						
						
						
					 
					
						2017-04-07 13:05:12 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							789ce8a45e 
							
						 
					 
					
						
						
							
							Add convert command  
						
						
						
					 
					
						2017-04-07 13:04:17 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9952d3b08a 
							
						 
					 
					
						
						
							
							Fix whitespace  
						
						
						
					 
					
						2017-04-07 13:02:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							dcf8ab0c47 
							
						 
					 
					
						
						
							
							Merge branch 'develop'  
						
						
						
					 
					
						2017-04-07 12:00:09 +02:00 
						 
				 
			
				
					
						
							
							
								Joshua Reeter 
							
						 
					 
					
						
						
						
						
							
						
						
							564daf6dec 
							
						 
					 
					
						
						
							
							Issue  #934  symlink should not convert paths as_posix under windows.  
						
						
						
					 
					
						2017-03-30 23:47:45 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							4759fd437d 
							
						 
					 
					
						
						
							
							Merge branch 'master' into develop  
						
						
						
					 
					
						2017-03-29 10:37:13 +02:00 
						 
				 
			
				
					
						
							
							
								Grégory Howard 
							
						 
					 
					
						
						
						
						
							
						
						
							9c2996b27f 
							
						 
					 
					
						
						
							
							correction of package.py (encoding on open instead of write)  
						
						
						
					 
					
						2017-03-29 09:11:02 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7198cf1c8a 
							
						 
					 
					
						
						
							
							Remove unused import  
						
						
						
					 
					
						2017-03-26 20:56:05 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7ceaa1614b 
							
						 
					 
					
						
						
							
							Add experimental model init command  
						
						
						
					 
					
						2017-03-26 20:51:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2efdbc08ff 
							
						 
					 
					
						
						
							
							Make training work with directories  
						
						
						
					 
					
						2017-03-26 08:46:44 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9dcb58aaaf 
							
						 
					 
					
						
						
							
							Merge CLI changes  
						
						
						
					 
					
						2017-03-26 07:30:45 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6b7f7a2060 
							
						 
					 
					
						
						
							
							Connect parser L1 option to train CLI  
						
						
						
					 
					
						2017-03-26 07:24:07 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dec5571bf3 
							
						 
					 
					
						
						
							
							Update train CLI  
						
						
						
					 
					
						2017-03-26 07:16:52 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							53cf2f1c0e 
							
						 
					 
					
						
						
							
							Make dev data optional  
						
						
						
					 
					
						2017-03-26 11:48:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5eac089fbe 
							
						 
					 
					
						
						
							
							Merge branch 'master' into develop  
						
						
						
					 
					
						2017-03-26 04:45:43 -05:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							97814f8da6 
							
						 
					 
					
						
						
							
							Update Windows Python 2 link workaround to use helper functions  
						
						
						
					 
					
						2017-03-25 14:04:27 +01:00 
						 
				 
			
				
					
						
							
							
								Greg Baker 
							
						 
					 
					
						
						
						
						
							
						
						
							b7f714b498 
							
						 
					 
					
						
						
							
							Possible solution to  #909  
						
						
						
					 
					
						2017-03-25 21:36:38 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c9cd99144 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/explosion/spaCy  
						
						
						
					 
					
						2017-03-23 11:11:24 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0035fd9efe 
							
						 
					 
					
						
						
							
							Add spacy train work in progress  
						
						
						
					 
					
						2017-03-23 11:08:41 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							c3a9f73896 
							
						 
					 
					
						
						
							
							Fix writing to file  
						
						
						
					 
					
						2017-03-21 12:35:22 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							d74aa428ad 
							
						 
					 
					
						
						
							
							Fix path  
						
						
						
					 
					
						2017-03-21 12:26:00 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							83a999ea83 
							
						 
					 
					
						
						
							
							Change default license from MIT to CC  
						
						
						
					 
					
						2017-03-21 12:24:43 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ae46647560 
							
						 
					 
					
						
						
							
							Fix brackets  
						
						
						
					 
					
						2017-03-21 12:21:42 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3e134b5b2b 
							
						 
					 
					
						
						
							
							Make sure paths in copytree and rmtree are strings  
						
						
						
					 
					
						2017-03-21 12:15:33 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							cf0094187e 
							
						 
					 
					
						
						
							
							Fetch MANIFEST.in from GitHub as well  
						
						
						
					 
					
						2017-03-21 11:32:38 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							3f4e3fda1d 
							
						 
					 
					
						
						
							
							Update command and fetch file templates from GitHub  
						
						... 
						
						
						
						While feature is still experimental, this allows files to be modified
without having to ship a new version of spaCy. 
						
					 
					
						2017-03-21 11:17:36 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							5230ed5b98 
							
						 
					 
					
						
						
							
							Move directory check and overwriting/creating dirs to own function  
						
						
						
					 
					
						2017-03-21 02:06:53 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							46bc3c36b0 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2017-03-21 02:06:37 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							64e38f304e 
							
						 
					 
					
						
						
							
							Only import shutil  
						
						
						
					 
					
						2017-03-21 02:06:29 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							448a916d0d 
							
						 
					 
					
						
						
							
							Add --force option to override directory  
						
						
						
					 
					
						2017-03-21 02:05:34 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							bf240132d7 
							
						 
					 
					
						
						
							
							Add cli.package command to build model packages  
						
						
						
					 
					
						2017-03-20 22:50:13 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							692eb0603d 
							
						 
					 
					
						
						
							
							Fix high memory usage in download command  
						
						... 
						
						
						
						Due to PyPi issue #2984 , installing large packages via pip causes
a large spike in memory usage. The recommended fix is to disable
caching. 
						
					 
					
						2017-03-20 18:24:44 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							b8f8d5d8bf 
							
						 
					 
					
						
						
							
							Make sure model_path is a Posix path  
						
						... 
						
						
						
						Otherwise, formatting the success message with model_path.as_posix()
fails when using a local path for linking (linking still works, but the
error message is confusing) 
						
					 
					
						2017-03-19 11:57:13 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							8de5108af6 
							
						 
					 
					
						
						
							
							Exclude common cache directories from mode list in cli.info  
						
						... 
						
						
						
						This means models called "cache" etc. won't show up in the list, but it
seems worth it. 
						
					 
					
						2017-03-19 01:44:43 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							797f286c38 
							
						 
					 
					
						
						
							
							Use import to find data package  
						
						
						
					 
					
						2017-03-19 01:39:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bc10d06bc2 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/explosion/spaCy  
						
						
						
					 
					
						2017-03-18 19:32:54 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1754e0db9b 
							
						 
					 
					
						
						
							
							Call pip via subprocess, to make it use virtualenv  
						
						
						
					 
					
						2017-03-18 19:29:36 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							1277abcde2 
							
						 
					 
					
						
						
							
							Remove print statement  
						
						
						
					 
					
						2017-03-18 19:14:58 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dcec104643 
							
						 
					 
					
						
						
							
							Remove unused import  
						
						
						
					 
					
						2017-03-18 18:57:45 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							703eb7bdbd 
							
						 
					 
					
						
						
							
							Fix link module  
						
						
						
					 
					
						2017-03-18 18:57:31 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7d33104180 
							
						 
					 
					
						
						
							
							Use distutils.sysconfig.get_python_lib  
						
						... 
						
						
						
						site.getsitepackages seems to not work as expected in Python 2 
						
					 
					
						2017-03-18 18:20:40 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0dd7710556 
							
						 
					 
					
						
						
							
							Make sure paths are paths  
						
						
						
					 
					
						2017-03-18 16:48:52 +01:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							ec3e810662 
							
						 
					 
					
						
						
							
							Add directory cli and set up command line interface  
						
						
						
					 
					
						2017-03-18 15:14:48 +01:00