svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							40276fd3be 
							
						 
					 
					
						
						
							
							update NEL docs after latest refactor  
						
						
						
					 
					
						2020-10-12 11:41:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							08cb085f6c 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into fix/various  
						
						
						
					 
					
						2020-10-09 17:01:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							040c7c0541 
							
						 
					 
					
						
						
							
							fix get_dim calls in build_simple_cnn_text_classifier  
						
						
						
					 
					
						2020-10-09 15:40:58 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							853edace37 
							
						 
					 
					
						
						
							
							fix MultiHashEmbed example in documentation  
						
						
						
					 
					
						2020-10-09 14:11:06 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							39aabf50ab 
							
						 
					 
					
						
						
							
							Also rename to include_static_vectors in CharEmbed  
						
						
						
					 
					
						2020-10-09 11:54:48 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1a554bdcb1 
							
						 
					 
					
						
						
							
							Update docs and docstring [ci skip]  
						
						
						
					 
					
						2020-10-05 21:55:27 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9614e53b02 
							
						 
					 
					
						
						
							
							Tidy up and auto-format  
						
						
						
					 
					
						2020-10-05 21:55:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e50047f1c5 
							
						 
					 
					
						
						
							
							Check lengths match  
						
						
						
					 
					
						2020-10-05 20:02:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cdd2b79b6d 
							
						 
					 
					
						
						
							
							Remove deprecated MultiHashEmbed  
						
						
						
					 
					
						2020-10-05 19:58:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6dcc4a0ba6 
							
						 
					 
					
						
						
							
							Simplify MultiHashEmbed signature  
						
						
						
					 
					
						2020-10-05 19:57:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eb9ba61517 
							
						 
					 
					
						
						
							
							Format  
						
						
						
					 
					
						2020-10-05 15:29:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8ec79ad3fa 
							
						 
					 
					
						
						
							
							Allow configuration of MultiHashEmbed features  
						
						... 
						
						
						
						Update arguments to MultiHashEmbed layer so that the attributes can be
controlled. A kind of tricky scheme is used to allow optional
specification of the rows. I think it's an okay balance between
flexibility and convenience. 
						
					 
					
						2020-10-05 15:22:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bcd52e5486 
							
						 
					 
					
						
						
							
							Tidy up errors and warnings  
						
						
						
					 
					
						2020-10-04 11:16:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3bc3c05fcc 
							
						 
					 
					
						
						
							
							Tidy up and auto-format  
						
						
						
					 
					
						2020-10-03 17:20:18 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							02247cccaf 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/small-fixes  
						
						
						
					 
					
						2020-10-02 20:48:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6965cdf16d 
							
						 
					 
					
						
						
							
							Fix comment  
						
						
						
					 
					
						2020-10-02 17:26:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							75a1569908 
							
						 
					 
					
						
						
							
							Merge  
						
						
						
					 
					
						2020-10-01 23:07:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							300e5a9928 
							
						 
					 
					
						
						
							
							Avoid relying on NORM in default v3 models ( #6176 )  
						
						... 
						
						
						
						* Allow CharacterEmbed to specify feature
* Default to LOWER in character embed
* Update tok2vec
* Use LOWER, not NORM 
						
					 
					
						2020-10-01 23:05:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b854bca15c 
							
						 
					 
					
						
						
							
							Default to LOWER in character embed  
						
						
						
					 
					
						2020-10-01 22:17:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							684a77870b 
							
						 
					 
					
						
						
							
							Allow CharacterEmbed to specify feature  
						
						
						
					 
					
						2020-10-01 22:17:26 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a22215f427 
							
						 
					 
					
						
						
							
							Add FeatureExtractor from Thinc ( #6170 )  
						
						... 
						
						
						
						* move featureextractor from Thinc
* Update website/docs/api/architectures.md
Co-authored-by: Ines Montani <ines@ines.io>
* Update website/docs/api/architectures.md
Co-authored-by: Ines Montani <ines@ines.io>
Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-10-01 16:22:48 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							5121972930 
							
						 
					 
					
						
						
							
							add types of Tok2Vec embedding layers  
						
						
						
					 
					
						2020-10-01 09:20:09 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							5a9fdbc8ad 
							
						 
					 
					
						
						
							
							state_type as Literal  
						
						
						
					 
					
						2020-09-23 17:32:14 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							25b34bba94 
							
						 
					 
					
						
						
							
							throw custom error when state_type is invalid  
						
						
						
					 
					
						2020-09-23 16:57:14 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							dd2292793f 
							
						 
					 
					
						
						
							
							'parser' instead of 'deps' for state_type  
						
						
						
					 
					
						2020-09-23 16:53:49 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							6c85fab316 
							
						 
					 
					
						
						
							
							state_type and extra_state_tokens instead of nr_feature_tokens  
						
						
						
					 
					
						2020-09-23 13:35:09 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1114219ae3 
							
						 
					 
					
						
						
							
							Tidy up and auto-format  
						
						
						
					 
					
						2020-09-21 10:59:07 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							f3db3f6fe0 
							
						 
					 
					
						
						
							
							Add vectors option to CharacterEmbed ( #6069 )  
						
						... 
						
						
						
						* Add vectors option to CharacterEmbed
* Update spacy/pipeline/morphologizer.pyx
* Adjust default morphologizer config
Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com> 
						
					 
					
						2020-09-16 17:45:04 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							1955aaaa20 
							
						 
					 
					
						
						
							
							Merge pull request  #6045  from svlandeg/feature/more-layers-docs [ci skip]  
						
						
						
					 
					
						2020-09-09 21:46:40 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							cb66ea7400 
							
						 
					 
					
						
						
							
							Remove simple_ner code ( #6041 )  
						
						... 
						
						
						
						* remove simple_ner code
* remove unused _biluo and _iob files 
						
					 
					
						2020-09-09 16:11:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							39aa740777 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/more-layers-docs  
						
						
						
					 
					
						2020-09-09 11:59:34 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							60f22e1800 
							
						 
					 
					
						
						
							
							Pipe API ( #6034 )  
						
						... 
						
						
						
						* ensure Language passes on valid examples for initialization
* fix tagger model initialization
* check for valid get_examples across components
* assume labels were added before begin_training
* fix senter initialization
* fix morphologizer initialization
* use methods to check arguments
* test textcat init, requires thinc>=8.0.0a31
* fix tok2vec init
* fix entity linker init
* use islice
* fix simple NER
* cleanup debug model
* fix assert statements
* fix tests
* throw error when adding a label if the output layer can't be resized anymore
* fix test
* add failing test for simple_ner
* UX improvements
* morphologizer UX
* assume begin_training gets a representative set and processes the labels
* remove assumptions for output of untrained NER model
* restore test for original purpose 
						
					 
					
						2020-09-08 22:44:25 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							bd8f9b188b 
							
						 
					 
					
						
						
							
							small fixes  
						
						
						
					 
					
						2020-09-08 17:24:36 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							06ef66fd73 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/more-layers-docs  
						
						
						
					 
					
						2020-09-08 10:28:42 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							c32fcdf4c9 
							
						 
					 
					
						
						
							
							fix typo  
						
						
						
					 
					
						2020-09-04 09:10:21 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							5afe6447cd 
							
						 
					 
					
						
						
							
							registry.assets -> registry.misc  
						
						
						
					 
					
						2020-09-03 17:31:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							160a855246 
							
						 
					 
					
						
						
							
							Format  
						
						
						
					 
					
						2020-08-23 21:15:12 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							358cbb21e3 
							
						 
					 
					
						
						
							
							Define candidate generator in EL config ( #5876 )  
						
						... 
						
						
						
						* candidate generator as separate part of EL config
* update comment
* ent instead of str as input for candidate generation
* Span instead of str: correct type indication
* fix types
* unit test to create new candidate generator
* fix replace_pipe argument passing
* move error message, general cleanup
* add vocab back to KB constructor
* provide KB as callable from Vocab arg
* rename to kb_loader, fix KB serialization as part of the EL pipe
* fix typo
* reformatting
* cleanup
* fix comment
* fix wrongly duplicated code from merge conflict
* rename dump to to_disk
* from_disk instead of load_bulk
* update test after recent removal of set_morphology in tagger
* remove old doc 
						
					 
					
						2020-08-18 16:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3a193eb8f1 
							
						 
					 
					
						
						
							
							Fix imports, types and default configs  
						
						
						
					 
					
						2020-08-07 18:40:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b1d83fc13e 
							
						 
					 
					
						
						
							
							Fix imports  
						
						
						
					 
					
						2020-08-07 16:55:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							473504d837 
							
						 
					 
					
						
						
							
							Format  
						
						
						
					 
					
						2020-08-07 16:49:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							234c52a91e 
							
						 
					 
					
						
						
							
							Add tok2vec docstrings  
						
						
						
					 
					
						2020-08-07 16:48:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							547bc8a82b 
							
						 
					 
					
						
						
							
							Add docstring notes  
						
						
						
					 
					
						2020-08-07 16:17:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							da6e59519e 
							
						 
					 
					
						
						
							
							Add docstrings for simple_ner  
						
						
						
					 
					
						2020-08-07 15:09:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ef8a64df9 
							
						 
					 
					
						
						
							
							Add docstring for parser  
						
						
						
					 
					
						2020-08-07 14:59:34 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							82347110f5 
							
						 
					 
					
						
						
							
							Default empty KB in EL component ( #5872 )  
						
						... 
						
						
						
						* EL field documentation
* documentation consistent with docs
* default empty KB, initialize vocab separately
* formatting
* add test for changing the default entity vector length
* update comment 
						
					 
					
						2020-08-04 14:34:09 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e9e8fa2466 
							
						 
					 
					
						
						
							
							Update docs and types  
						
						
						
					 
					
						2020-07-31 17:02:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							142b58be92 
							
						 
					 
					
						
						
							
							Fix import  
						
						
						
					 
					
						2020-07-29 14:45:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c99a653070 
							
						 
					 
					
						
						
							
							Adjust textcat model  
						
						
						
					 
					
						2020-07-29 14:38:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e1b11dd81 
							
						 
					 
					
						
						
							
							Update vectors in textcat  
						
						
						
					 
					
						2020-07-29 14:35:36 +02:00