Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							02838a1d47 
							
						 
					 
					
						
						
							
							Fix resolve_dot_names  
						
						
						
					 
					
						2020-09-28 15:27:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							822ea4ef61 
							
						 
					 
					
						
						
							
							Refactor CLI  
						
						
						
					 
					
						2020-09-28 15:09:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a89e0ff7cb 
							
						 
					 
					
						
						
							
							Fix typo  
						
						
						
					 
					
						2020-09-28 12:55:21 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a62337b3f3 
							
						 
					 
					
						
						
							
							Tidy up vocab init  
						
						
						
					 
					
						2020-09-28 12:53:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c22ecc66bb 
							
						 
					 
					
						
						
							
							Don't support init path for now  
						
						
						
					 
					
						2020-09-28 12:46:28 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f49288ab81 
							
						 
					 
					
						
						
							
							Update default_config_pretraining.cfg  
						
						
						
					 
					
						2020-09-28 12:31:54 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a5f2cc0509 
							
						 
					 
					
						
						
							
							Tidy up and remove raw text (rehearsal) for now  
						
						
						
					 
					
						2020-09-28 12:30:13 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1590de11b1 
							
						 
					 
					
						
						
							
							Update config  
						
						
						
					 
					
						2020-09-28 12:05:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9f6ad06452 
							
						 
					 
					
						
						
							
							Upd default config  
						
						
						
					 
					
						2020-09-28 12:00:23 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e44a7519cd 
							
						 
					 
					
						
						
							
							Update CLI and add [initialize] block  
						
						
						
					 
					
						2020-09-28 11:56:14 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d5155376fd 
							
						 
					 
					
						
						
							
							Update vocab init  
						
						
						
					 
					
						2020-09-28 11:30:18 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8b74fd19df 
							
						 
					 
					
						
						
							
							init pipeline -> init nlp  
						
						
						
					 
					
						2020-09-28 11:13:38 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2fdb7285a0 
							
						 
					 
					
						
						
							
							Update CLI  
						
						
						
					 
					
						2020-09-28 11:06:07 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							553bfea641 
							
						 
					 
					
						
						
							
							Fix commands  
						
						
						
					 
					
						2020-09-28 10:53:17 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							09d42d4bf0 
							
						 
					 
					
						
						
							
							Add pickle5 to Makefile  
						
						
						
					 
					
						2020-09-28 09:49:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							44bad1474c 
							
						 
					 
					
						
						
							
							Add init_pipeline file  
						
						
						
					 
					
						2020-09-28 09:47:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							65448b2e34 
							
						 
					 
					
						
						
							
							Remove schema=None until Optional  
						
						
						
					 
					
						2020-09-28 03:42:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b886f53c31 
							
						 
					 
					
						
						
							
							init-pipeline runs (maybe doesnt work)  
						
						
						
					 
					
						2020-09-28 03:42:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ed2aff2db3 
							
						 
					 
					
						
						
							
							Remove unused train code  
						
						
						
					 
					
						2020-09-28 03:12:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3a0a3b8db6 
							
						 
					 
					
						
						
							
							Dont hard-code for 'corpora' name  
						
						
						
					 
					
						2020-09-28 03:06:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a023cf3ecc 
							
						 
					 
					
						
						
							
							Add (untested) resolve_dot_names util  
						
						
						
					 
					
						2020-09-28 03:06:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a976da168c 
							
						 
					 
					
						
						
							
							Support data augmentation in Corpus ( #6155 )  
						
						... 
						
						
						
						* Support data augmentation in Corpus
* Note initial docs for data augmentation
* Add augmenter to quickstart
* Fix flake8
* Format
* Fix test
* Update spacy/tests/training/test_training.py
* Improve data augmentation arguments
* Update templates
* Move randomization out into caller
* Refactor
* Update spacy/training/augment.py
* Update spacy/tests/training/test_training.py
* Fix augment
* Fix test 
						
					 
					
						2020-09-28 03:03:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							13b1605ee6 
							
						 
					 
					
						
						
							
							Add init script  
						
						
						
					 
					
						2020-09-28 01:08:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3e1791c9c 
							
						 
					 
					
						
						
							
							Upd train  
						
						
						
					 
					
						2020-09-28 01:08:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b5556093e2 
							
						 
					 
					
						
						
							
							Start updating train script  
						
						
						
					 
					
						2020-09-27 23:59:44 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							cad4dbddaa 
							
						 
					 
					
						
						
							
							Merge pull request  #6156  from explosion/feature/new-thinc-config-resolution  
						
						
						
					 
					
						2020-09-27 23:57:52 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9016d23cc5 
							
						 
					 
					
						
						
							
							Fix exclude and add test  
						
						
						
					 
					
						2020-09-27 23:34:03 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c0c842ae5b 
							
						 
					 
					
						
						
							
							Update Thinc version  
						
						
						
					 
					
						2020-09-27 23:24:40 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							658fad428a 
							
						 
					 
					
						
						
							
							Fix base schema integration  
						
						
						
					 
					
						2020-09-27 22:50:36 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							47c6a461e5 
							
						 
					 
					
						
						
							
							Revert except all in CLI error handling [ci skip]  
						
						
						
					 
					
						2020-09-27 22:41:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							5c53a76021 
							
						 
					 
					
						
						
							
							Improve CLI error handling [ci skip]  
						
						
						
					 
					
						2020-09-27 22:39:04 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e04bd16f7f 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/new-thinc-config-resolution  
						
						
						
					 
					
						2020-09-27 22:34:46 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d7ad65a9bb 
							
						 
					 
					
						
						
							
							Fix handling of error description [ci skip]  
						
						
						
					 
					
						2020-09-27 22:31:57 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							7e938ed63e 
							
						 
					 
					
						
						
							
							Update config resolution to use new Thinc  
						
						
						
					 
					
						2020-09-27 22:21:31 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							013b66de05 
							
						 
					 
					
						
						
							
							Add tokenizer scoring to ja / ko / zh ( #6152 )  
						
						
						
					 
					
						2020-09-27 22:20:45 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a6548ead17 
							
						 
					 
					
						
						
							
							Add _ as a symbol ( #6153 )  
						
						... 
						
						
						
						* Add _ to StringStore in Morphology
* Add _ as a symbol
Add `_` as a symbol instead of adding to the `StringStore`. 
						
					 
					
						2020-09-27 22:20:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							39b178999c 
							
						 
					 
					
						
						
							
							Tmp notes  
						
						
						
					 
					
						2020-09-27 20:13:38 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f29d5b9b89 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-27 18:39:38 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							8393dbedad 
							
						 
					 
					
						
						
							
							Minor fixes  
						
						... 
						
						
						
						* Put `cfg` back in serialization
* Add `pickle5` to pytest conf 
						
					 
					
						2020-09-27 15:15:53 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							54fe871935 
							
						 
					 
					
						
						
							
							Fix formatting, refactor pickle5 exceptions  
						
						
						
					 
					
						2020-09-27 14:37:28 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							11e195d3ed 
							
						 
					 
					
						
						
							
							Update ChineseTokenizer  
						
						... 
						
						
						
						* Allow `pkuseg_model` to be set to `None` on initialization
* Don't save config within tokenizer
* Force convert pkuseg_model to use pickle protocol 4 by reencoding with
`pickle5` on serialization
* Update pkuseg serialization test 
						
					 
					
						2020-09-27 14:00:18 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							3838b14148 
							
						 
					 
					
						
						
							
							Merge pull request  #6151  from explosion/fix/train-config-interpolation  
						
						
						
					 
					
						2020-09-26 15:56:45 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b4486d747d 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into fix/train-config-interpolation  
						
						
						
					 
					
						2020-09-26 15:32:14 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							8fea06d55e 
							
						 
					 
					
						
						
							
							Merge pull request  #6149  from adrianeboyd/feature/attributeruler-match-ids  
						
						... 
						
						
						
						Simplify string match IDs for AttributeRuler 
						
					 
					
						2020-09-26 15:31:30 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							b78a60ef73 
							
						 
					 
					
						
						
							
							Merge pull request  #6150  from explosion/feature/cli-config-validation  
						
						... 
						
						
						
						Improve CLI config validation with latest Thinc 
						
					 
					
						2020-09-26 15:30:51 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b2d07de786 
							
						 
					 
					
						
						
							
							Construct nlp from uninterpolated config before training  
						
						
						
					 
					
						2020-09-26 15:16:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e06ff8b71d 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-26 13:18:08 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ca3c997062 
							
						 
					 
					
						
						
							
							Improve CLI config validation with latest Thinc  
						
						
						
					 
					
						2020-09-26 13:13:57 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							6c25e60089 
							
						 
					 
					
						
						
							
							Simplify string match IDs for AttributeRuler  
						
						
						
					 
					
						2020-09-26 11:12:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							702edf52a0 
							
						 
					 
					
						
						
							
							Fix attributeruler  
						
						
						
					 
					
						2020-09-26 00:30:48 +02:00