Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							7c4ab7e82c 
							
						 
					 
					
						
						
							
							Fix Lemmatizer.get_lookups_config  
						
						
						
					 
					
						2020-10-03 17:16:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							dd542ec6a4 
							
						 
					 
					
						
						
							
							Fix label initialization of textcat component ( #6190 )  
						
						
						
					 
					
						2020-10-03 17:07:38 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							35d695a031 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-10-03 16:08:24 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							02247cccaf 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/small-fixes  
						
						
						
					 
					
						2020-10-02 20:48:11 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							09dcb75076 
							
						 
					 
					
						
						
							
							small UX fix for DocBin ( #6167 )  
						
						... 
						
						
						
						* add informative warning when messing up store_user_data DocBin flags
* add informative warning when messing up store_user_data DocBin flags
* cleanup test
* rename to patterns_path 
						
					 
					
						2020-10-02 15:43:32 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							f0b30aedad 
							
						 
					 
					
						
						
							
							Make lemmatizers use initialize logic ( #6182 )  
						
						... 
						
						
						
						* Make lemmatizer use initialize logic and tidy up
* Fix typo
* Raise for uninitialized tables 
						
					 
					
						2020-10-02 15:42:36 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							df06f7a792 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-10-02 13:24:33 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							d2aa662ab2 
							
						 
					 
					
						
						
							
							Merge pull request  #6179  from adrianeboyd/feature/token-morph-refactor-2 [ci skip]  
						
						
						
					 
					
						2020-10-02 12:10:27 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							32cdc1c4f4 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-10-02 11:38:03 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							fd09e6b140 
							
						 
					 
					
						
						
							
							Update docs for Token.morph / Token.set_morph  
						
						
						
					 
					
						2020-10-02 09:05:15 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							01c1538c72 
							
						 
					 
					
						
						
							
							Integrate file readers  
						
						
						
					 
					
						2020-10-02 01:36:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							6b94cee468 
							
						 
					 
					
						
						
							
							Fix docs [ci skip]  
						
						
						
					 
					
						2020-10-02 01:11:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f2627157c8 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-10-01 17:38:17 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							1328c9fd14 
							
						 
					 
					
						
						
							
							consistently use --code instead of --code-path  
						
						
						
					 
					
						2020-10-01 16:59:22 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a22215f427 
							
						 
					 
					
						
						
							
							Add FeatureExtractor from Thinc ( #6170 )  
						
						... 
						
						
						
						* move featureextractor from Thinc
* Update website/docs/api/architectures.md
Co-authored-by: Ines Montani <ines@ines.io>
* Update website/docs/api/architectures.md
Co-authored-by: Ines Montani <ines@ines.io>
Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-10-01 16:22:48 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0a8a124a6e 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-10-01 12:15:53 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a103ab5f1a 
							
						 
					 
					
						
						
							
							Update augmenter lookups and docs  
						
						
						
					 
					
						2020-09-30 23:03:47 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							115481aca7 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-30 15:16:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9bb958fd0a 
							
						 
					 
					
						
						
							
							Fix debug data [ci skip]  
						
						
						
					 
					
						2020-09-29 23:07:11 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							604be54a5c 
							
						 
					 
					
						
						
							
							Support --code in evaluate CLI [ci skip]  
						
						
						
					 
					
						2020-09-29 21:20:56 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d3c63b7965 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/prepare  
						
						
						
					 
					
						2020-09-29 20:53:05 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							361f91e286 
							
						 
					 
					
						
						
							
							Merge pull request  #6135  from walterhenry/develop-proof  
						
						
						
					 
					
						2020-09-29 20:49:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							b486389eec 
							
						 
					 
					
						
						
							
							Update website/docs/api/doc.md  
						
						
						
					 
					
						2020-09-29 20:48:43 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d7469283c5 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-29 16:59:21 +02:00 
						 
				 
			
				
					
						
							
							
								walterhenry 
							
						 
					 
					
						
						
						
						
							
						
						
							c1c841940c 
							
						 
					 
					
						
						
							
							Merge branch 'develop-proof' of  https://github.com/walterhenry/spaCy  into develop-proof  
						
						
						
					 
					
						2020-09-29 11:47:43 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ff9a63bfbd 
							
						 
					 
					
						
						
							
							begin_training -> initialize  
						
						
						
					 
					
						2020-09-28 21:35:09 +02:00 
						 
				 
			
				
					
						
							
							
								walterhenry 
							
						 
					 
					
						
						
						
						
							
						
						
							3360825e00 
							
						 
					 
					
						
						
							
							Proofreading  
						
						... 
						
						
						
						Another round of proofreading. All the API docs have been read through and I've grazed the Usage docs. 
						
					 
					
						2020-09-28 16:50:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a976da168c 
							
						 
					 
					
						
						
							
							Support data augmentation in Corpus ( #6155 )  
						
						... 
						
						
						
						* Support data augmentation in Corpus
* Note initial docs for data augmentation
* Add augmenter to quickstart
* Fix flake8
* Format
* Fix test
* Update spacy/tests/training/test_training.py
* Improve data augmentation arguments
* Update templates
* Move randomization out into caller
* Refactor
* Update spacy/training/augment.py
* Update spacy/tests/training/test_training.py
* Fix augment
* Fix test 
						
					 
					
						2020-09-28 03:03:27 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f29d5b9b89 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-27 18:39:38 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							009ba14aaf 
							
						 
					 
					
						
						
							
							Fix pretraining in train script ( #6143 )  
						
						... 
						
						
						
						* update pretraining API in train CLI
* bump thinc to 8.0.0a35
* bump to 3.0.0a26
* doc fixes
* small doc fix 
						
					 
					
						2020-09-25 15:47:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2aa4d65734 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-24 20:41:09 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							3c062b3911 
							
						 
					 
					
						
						
							
							Add MORPH handling to Matcher ( #6107 )  
						
						... 
						
						
						
						* Add MORPH handling to Matcher
* Add `MORPH` to `Matcher` schema
* Rename `_SetMemberPredicate` to `_SetPredicate`
* Add `ISSUBSET` and `ISSUPERSET` operators to `_SetPredicate`
  * Add special handling for normalization and conversion of morph
    values into sets
  * For other attrs, `ISSUBSET` acts like `IN` and `ISSUPERSET` only
    matches for 0 or 1 values
* Update test
* Rename to IS_SUBSET and IS_SUPERSET 
						
					 
					
						2020-09-24 16:55:09 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							c7eedd3534 
							
						 
					 
					
						
						
							
							updates to NEL functionality ( #6132 )  
						
						... 
						
						
						
						* NEL: read sentences and ents from reference
* fiddling with sent_start annotations
* add KB serialization test
* KB write additional file with strings.json
* score_links function to calculate NEL P/R/F
* formatting
* documentation 
						
					 
					
						2020-09-24 16:53:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							58dde293ce 
							
						 
					 
					
						
						
							
							Merge pull request  #6089  from adrianeboyd/feature/doc-ents-v3-2  
						
						
						
					 
					
						2020-09-24 14:44:42 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							74e1f192b4 
							
						 
					 
					
						
						
							
							Merge pull request  #6134  from explosion/feature/training_before_to_disk  
						
						
						
					 
					
						2020-09-24 14:44:11 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3b58a8be2b 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-09-24 14:32:42 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							88e54caa12 
							
						 
					 
					
						
						
							
							accuracy -> performance  
						
						
						
					 
					
						2020-09-24 14:32:35 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b92c8aae78 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into pr/6135  
						
						
						
					 
					
						2020-09-24 13:44:56 +02:00 
						 
				 
			
				
					
						
							
							
								walterhenry 
							
						 
					 
					
						
						
						
						
							
						
						
							3dd5f409ec 
							
						 
					 
					
						
						
							
							Proofreading  
						
						... 
						
						
						
						Proofread some API docs 
						
					 
					
						2020-09-24 13:15:28 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							1c63f02f99 
							
						 
					 
					
						
						
							
							Add API docs  
						
						
						
					 
					
						2020-09-24 12:51:16 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							138c8d45db 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-09-24 12:43:39 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ae51f580c1 
							
						 
					 
					
						
						
							
							Fix handling of score_weights  
						
						
						
					 
					
						2020-09-24 10:27:33 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							dd2292793f 
							
						 
					 
					
						
						
							
							'parser' instead of 'deps' for state_type  
						
						
						
					 
					
						2020-09-23 16:53:49 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							6c85fab316 
							
						 
					 
					
						
						
							
							state_type and extra_state_tokens instead of nr_feature_tokens  
						
						
						
					 
					
						2020-09-23 13:35:09 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							6ca06cb62c 
							
						 
					 
					
						
						
							
							Update docs and formatting [ci skip]  
						
						
						
					 
					
						2020-09-23 10:14:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							b556a10808 
							
						 
					 
					
						
						
							
							rename converts in_to_out  
						
						
						
					 
					
						2020-09-22 11:50:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f9af7d365c 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-22 09:45:41 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							49e80dbcac 
							
						 
					 
					
						
						
							
							Merge pull request  #6103  from explosion/chore/tidy-up-tests-docs-get-doc  
						
						
						
					 
					
						2020-09-22 09:45:04 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							5fbb8dfcbc 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into docs/various-v3-2  
						
						
						
					 
					
						2020-09-22 09:22:58 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							67fbcb3da5 
							
						 
					 
					
						
						
							
							Tidy up tests and docs  
						
						
						
					 
					
						2020-09-21 20:43:54 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							f212303729 
							
						 
					 
					
						
						
							
							Add sent_starts to Doc.__init__  
						
						... 
						
						
						
						Add sent_starts to `Doc.__init__`. Officially specify `is_sent_start`
values but also convert to and accept `sent_start` internally. 
						
					 
					
						2020-09-21 17:59:09 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							6aa91c7ca0 
							
						 
					 
					
						
						
							
							Make user_data keyword-only  
						
						
						
					 
					
						2020-09-21 16:00:06 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							bc02e86494 
							
						 
					 
					
						
						
							
							Extend Doc.__init__ with additional annotation  
						
						... 
						
						
						
						Mostly copying from `spacy.tests.util.get_doc`, add additional kwargs to
`Doc.__init__` to initialize the most common doc/token values. 
						
					 
					
						2020-09-21 13:36:24 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							3aa57ce6c9 
							
						 
					 
					
						
						
							
							Update alignment mode in Doc.char_span docs  
						
						
						
					 
					
						2020-09-21 09:07:20 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							012b3a7096 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-20 17:44:58 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							554c9a2497 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-20 12:30:53 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							39872de1f6 
							
						 
					 
					
						
						
							
							Introducing the gpu_allocator ( #6091 )  
						
						... 
						
						
						
						* rename 'use_pytorch_for_gpu_memory' to 'gpu_allocator'
* --code instead of --code-path
* update documentation
* avoid querying the "system" section directly
* add explanation of gpu_allocator to TF/PyTorch section in docs
* fix typo
* fix typo 2
* use set_gpu_allocator from thinc 8.0.0a34
* default null instead of empty string 
						
					 
					
						2020-09-19 01:17:02 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0406200a1e 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-18 15:13:13 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a127fa475e 
							
						 
					 
					
						
						
							
							Merge pull request  #6078  from svlandeg/fix/corpus  
						
						
						
					 
					
						2020-09-18 14:44:21 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d32ce121be 
							
						 
					 
					
						
						
							
							Fix docs [ci skip]  
						
						
						
					 
					
						2020-09-18 13:41:12 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1bb8b4f824 
							
						 
					 
					
						
						
							
							Merge branch 'master' into develop  
						
						
						
					 
					
						2020-09-17 17:46:20 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2e3ce9f42f 
							
						 
					 
					
						
						
							
							Merge branch 'feature/init-config-pretrain' of  https://github.com/svlandeg/spaCy  into pr/6084  
						
						
						
					 
					
						2020-09-17 16:58:49 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3d8e010655 
							
						 
					 
					
						
						
							
							Change order  
						
						
						
					 
					
						2020-09-17 16:58:46 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							c4b414b282 
							
						 
					 
					
						
						
							
							Update website/docs/api/cli.md  
						
						
						
					 
					
						2020-09-17 16:58:09 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							e5ceec5df0 
							
						 
					 
					
						
						
							
							Update website/docs/api/cli.md  
						
						... 
						
						
						
						Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-09-17 16:56:20 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							127ce0c574 
							
						 
					 
					
						
						
							
							Update website/docs/api/cli.md  
						
						... 
						
						
						
						Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-09-17 16:55:53 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							5fade4feb7 
							
						 
					 
					
						
						
							
							fix cli abbrev  
						
						
						
					 
					
						2020-09-17 16:15:20 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							ddfc1fc146 
							
						 
					 
					
						
						
							
							add pretraining option to init config  
						
						
						
					 
					
						2020-09-17 16:05:40 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							130ffa5fbf 
							
						 
					 
					
						
						
							
							fix typos in docs  
						
						
						
					 
					
						2020-09-17 14:59:41 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							0c35885751 
							
						 
					 
					
						
						
							
							generalize corpora, dot notation for dev and train corpus  
						
						
						
					 
					
						2020-09-17 11:38:59 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							8cedb2f380 
							
						 
					 
					
						
						
							
							Merge branch 'fix/corpus' of  https://github.com/svlandeg/spaCy  into fix/corpus  
						
						
						
					 
					
						2020-09-17 09:27:55 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							781fae678b 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into fix/corpus  
						
						
						
					 
					
						2020-09-17 09:24:36 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							21dcf92964 
							
						 
					 
					
						
						
							
							Update website/docs/api/data-formats.md  
						
						... 
						
						
						
						Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com> 
						
					 
					
						2020-09-17 09:21:36 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							7e4cd7575c 
							
						 
					 
					
						
						
							
							Refactor Docs.is_ flags ( #6044 )  
						
						... 
						
						
						
						* Refactor Docs.is_ flags
* Add derived `Doc.has_annotation` method
  * `Doc.has_annotation(attr)` returns `True` for partial annotation
  * `Doc.has_annotation(attr, require_complete=True)` returns `True` for
    complete annotation
* Add deprecation warnings to `is_tagged`, `is_parsed`, `is_sentenced`
and `is_nered`
* Add `Doc._get_array_attrs()`, which returns a full list of `Doc` attrs
for use with `Doc.to_array`, `Doc.to_bytes` and `Doc.from_docs`. The
list is the `DocBin` attributes list plus `SPACY` and `LENGTH`.
Notes on `Doc.has_annotation`:
* `HEAD` is converted to `DEP` because heads don't have an unset state
* Accept `IS_SENT_START` as a synonym of `SENT_START`
Additional changes:
* Add `NORM`, `ENT_ID` and `SENT_START` to default attributes for
`DocBin`
* In `Doc.from_array()` the presence of `DEP` causes `HEAD` to override
`SENT_START`
* In `Doc.from_array()` using `attrs` other than
`Doc._get_array_attrs()` (i.e., a user's custom list rather than our
default internal list) with both `HEAD` and `SENT_START` shows a warning
that `HEAD` will override `SENT_START`
* `set_children_from_heads` does not require dependency labels to set
sentence boundaries and sets `sent_start` for all non-sentence starts to
`-1`
* Fix call to set_children_form_heads
Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com> 
						
					 
					
						2020-09-17 00:14:01 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							55f8d5478e 
							
						 
					 
					
						
						
							
							fix example output  
						
						
						
					 
					
						2020-09-15 22:09:30 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							51fa929f47 
							
						 
					 
					
						
						
							
							rewrite train_corpus to corpus.train in config  
						
						
						
					 
					
						2020-09-15 21:58:04 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0edd695bf6 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-09-15 11:41:49 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							99549a5ace 
							
						 
					 
					
						
						
							
							Fix consistency and update docs  
						
						
						
					 
					
						2020-09-15 11:37:37 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							154752f9c2 
							
						 
					 
					
						
						
							
							Update docs and consistency [ci skip]  
						
						
						
					 
					
						2020-09-15 00:32:49 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							3216a33149 
							
						 
					 
					
						
						
							
							positive_label config for textcat ( #6062 )  
						
						... 
						
						
						
						* hook up positive_label in textcat
* unit tests
* documentation
* formatting
* tests
* fix typo
* move verify_config to after begin_training
* revert accidential commit 
						
					 
					
						2020-09-14 17:08:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9afb1d9965 
							
						 
					 
					
						
						
							
							Merge pull request  #6063  from svlandeg/feature/doc_cleanup [ci skip]  
						
						
						
					 
					
						2020-09-14 10:35:43 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							47acb45850 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-13 22:30:33 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2e3d067a7b 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-13 19:29:06 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							744df9814a 
							
						 
					 
					
						
						
							
							define threshold for scoring textcat in TextCat config ( #6055 )  
						
						... 
						
						
						
						* define threshold for scoring textcat in TextCat config
* fix unit test and documentation 
						
					 
					
						2020-09-13 14:15:52 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							c4f324d5f1 
							
						 
					 
					
						
						
							
							doc fixes  
						
						
						
					 
					
						2020-09-12 17:38:54 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8b0dabe987 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-09-12 17:05:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0b2e07215d 
							
						 
					 
					
						
						
							
							Support overwriting name on spacy package  
						
						
						
					 
					
						2020-09-11 11:38:28 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							97d99f7efa 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/doc-fixes  
						
						
						
					 
					
						2020-09-10 11:51:34 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							15bc3a37b4 
							
						 
					 
					
						
						
							
							Add --branch to project clone  
						
						
						
					 
					
						2020-09-10 11:08:15 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b7afd09d27 
							
						 
					 
					
						
						
							
							Update formatting [ci skip]  
						
						
						
					 
					
						2020-09-10 11:07:09 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							9073d99fc9 
							
						 
					 
					
						
						
							
							fix link to shape inference section  
						
						
						
					 
					
						2020-09-10 10:22:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							1955aaaa20 
							
						 
					 
					
						
						
							
							Merge pull request  #6045  from svlandeg/feature/more-layers-docs [ci skip]  
						
						
						
					 
					
						2020-09-09 21:46:40 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2e567a47c2 
							
						 
					 
					
						
						
							
							Update docs and formatting  
						
						
						
					 
					
						2020-09-09 21:26:10 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							c89e07927e 
							
						 
					 
					
						
						
							
							document individual component API pages  
						
						
						
					 
					
						2020-09-09 16:18:38 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							cb66ea7400 
							
						 
					 
					
						
						
							
							Remove simple_ner code ( #6041 )  
						
						... 
						
						
						
						* remove simple_ner code
* remove unused _biluo and _iob files 
						
					 
					
						2020-09-09 16:11:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							a8aa9a8068 
							
						 
					 
					
						
						
							
							document Pipe API details, crossreferences etc  
						
						
						
					 
					
						2020-09-09 15:56:27 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							39aa740777 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/more-layers-docs  
						
						
						
					 
					
						2020-09-09 11:59:34 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							8e7557656f 
							
						 
					 
					
						
						
							
							Renaming gold & annotation_setter ( #6042 )  
						
						... 
						
						
						
						* version bump to 3.0.0a16
* rename "gold" folder to "training"
* rename 'annotation_setter' to 'set_extra_annotations'
* formatting 
						
					 
					
						2020-09-09 10:31:03 +02:00 
						 
				 
			
				
					
						
							
							
								Marek Grzenkowicz 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							a26f864ed3 
							
						 
					 
					
						
						
							
							Clarify how to choose pretrained weights files ( closes   #6027 ) [ci skip] ( #6039 )  
						
						
						
					 
					
						2020-09-08 21:13:50 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							bd8f9b188b 
							
						 
					 
					
						
						
							
							small fixes  
						
						
						
					 
					
						2020-09-08 17:24:36 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							157caf4dfa 
							
						 
					 
					
						
						
							
							WIP: update docs [ci skip]  
						
						
						
					 
					
						2020-09-04 16:30:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f174c7b1f3 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into pr/6018  
						
						
						
					 
					
						2020-09-04 15:54:49 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							864a697e63 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into master-tmp  
						
						
						
					 
					
						2020-09-04 13:15:36 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							b927893309 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/dependency-matcher-v3  
						
						
						
					 
					
						2020-09-04 13:03:30 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							4daf138136 
							
						 
					 
					
						
						
							
							Fix alphabetic ordering [ci skip]  
						
						
						
					 
					
						2020-09-03 23:01:50 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							23b7d9cfa3 
							
						 
					 
					
						
						
							
							Prefix span getters  
						
						
						
					 
					
						2020-09-03 17:37:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							5afe6447cd 
							
						 
					 
					
						
						
							
							registry.assets -> registry.misc  
						
						
						
					 
					
						2020-09-03 17:31:14 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c063e55eb7 
							
						 
					 
					
						
						
							
							Add prefix to batchers  
						
						
						
					 
					
						2020-09-03 17:30:41 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							804f120361 
							
						 
					 
					
						
						
							
							Don't use registered function version in title  
						
						
						
					 
					
						2020-09-03 17:29:47 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c53b1433b9 
							
						 
					 
					
						
						
							
							Adjust more arguments [ci skip]  
						
						
						
					 
					
						2020-09-03 17:12:24 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							25a595dc10 
							
						 
					 
					
						
						
							
							Fix typos and wording [ci skip]  
						
						
						
					 
					
						2020-09-03 16:37:45 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b5a0657fd6 
							
						 
					 
					
						
						
							
							"model" terminology consistency in docs  
						
						
						
					 
					
						2020-09-03 13:13:03 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							960d9cfadc 
							
						 
					 
					
						
						
							
							Officially support DependencyMatcher  
						
						... 
						
						
						
						Add official support for the `DependencyMatcher`. Redesign the pattern
specification. Fix and extend operator implementations. Update API docs
and add usage docs.
Patterns
--------
Refactor pattern structure to:
```
{
  "LEFT_ID": str,
  "REL_OP": str,
  "RIGHT_ID": str,
  "RIGHT_ATTRS": dict,
}
```
The first node contains only `RIGHT_ID` and `RIGHT_ATTRS` and all
subsequent nodes contain all four keys.
New operators
-------------
Because of the way patterns are constructed from left to right, it's
helpful to have `follows` operators along with `precedes` operators. Add
operators for simple precedes / follows alongside immediate precedes /
follows.
* `.*`: precedes
* `;`: immediately follows
* `;*`: follows
Operator fixes
--------------
* `<` and `<<` do not include the node itself
* Fix reversed order for all operators involving linear precedence (`.`,
  all sibling operators)
* Linear precedence operators do not match nodes outside the same parse
Additional fixes
----------------
* Use v3 Matcher API
* Support `get` and `remove`
* Support pickling 
						
					 
					
						2020-09-02 17:45:29 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							bbaea530f6 
							
						 
					 
					
						
						
							
							sublayers paragraph  
						
						
						
					 
					
						2020-09-02 17:36:22 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							9af82f3f11 
							
						 
					 
					
						
						
							
							Merge pull request  #6003  from explosion/feature/matcher-as-spans  
						
						
						
					 
					
						2020-08-31 17:50:56 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3929431af1 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-31 17:06:33 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							add9de5487 
							
						 
					 
					
						
						
							
							Deprecate (Phrase)Matcher.pipe  
						
						
						
					 
					
						2020-08-31 17:01:24 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							2c3b64a567 
							
						 
					 
					
						
						
							
							console logging example  
						
						
						
					 
					
						2020-08-31 16:56:13 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							bca6bf8dda 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-31 16:39:53 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							db9f8896f5 
							
						 
					 
					
						
						
							
							Add docs [ci skip]  
						
						
						
					 
					
						2020-08-31 16:10:41 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							fe6c08218e 
							
						 
					 
					
						
						
							
							fixes  
						
						
						
					 
					
						2020-08-31 14:51:49 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							0e0abb0378 
							
						 
					 
					
						
						
							
							fix  
						
						
						
					 
					
						2020-08-31 14:50:29 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							56ba691ecd 
							
						 
					 
					
						
						
							
							small fixes  
						
						
						
					 
					
						2020-08-31 14:46:00 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							e47ea88aeb 
							
						 
					 
					
						
						
							
							revert annotations refactor  
						
						
						
					 
					
						2020-08-31 14:40:55 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							2c90a06fee 
							
						 
					 
					
						
						
							
							some more information about the loggers  
						
						
						
					 
					
						2020-08-31 13:43:17 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							c18eb63483 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/vectors-docs  
						
						... 
						
						
						
						# Conflicts:
#	website/docs/usage/embeddings-transformers.md 
						
					 
					
						2020-08-31 13:21:36 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							ec14744ee4 
							
						 
					 
					
						
						
							
							Rename Transformer listener ( #6001 )  
						
						... 
						
						
						
						* rename to spacy-transformers.TransformerListener
* add some more tok2vec tests
* use select_pipes
* fix docs - annotation setter was not changed in the end 
						
					 
					
						2020-08-31 12:41:39 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9b86312bab 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-29 18:43:19 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							870774f475 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into docs/morph-usage-v3  
						
						
						
					 
					
						2020-08-29 16:00:50 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							45f46a5c85 
							
						 
					 
					
						
						
							
							Merge pull request  #5993  from explosion/feature/disabled-components  
						
						
						
					 
					
						2020-08-29 15:58:41 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							f9ed31a757 
							
						 
					 
					
						
						
							
							Update usage docs for lemmatization and morphology  
						
						
						
					 
					
						2020-08-29 15:56:50 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							450bf806b0 
							
						 
					 
					
						
						
							
							Merge pull request  #5991  from adrianeboyd/docs/sent-usage-v3  
						
						... 
						
						
						
						Update sentence segmentation usage docs 
						
					 
					
						2020-08-29 12:40:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							66d76f5126 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-29 12:36:05 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							5230529de2 
							
						 
					 
					
						
						
							
							add loggers registry & logger docs sections  
						
						
						
					 
					
						2020-08-28 21:44:04 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							48df50533d 
							
						 
					 
					
						
						
							
							Update sentence segmentation usage docs  
						
						... 
						
						
						
						Update sentence segmentation usage docs to incorporate `senter`. 
						
					 
					
						2020-08-28 10:58:16 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							72a87095d9 
							
						 
					 
					
						
						
							
							add loggers registry  
						
						
						
					 
					
						2020-08-27 20:26:28 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							aa9e0c9c39 
							
						 
					 
					
						
						
							
							small fix  
						
						
						
					 
					
						2020-08-27 19:56:52 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							8cde6ccb7d 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/vectors-docs  
						
						
						
					 
					
						2020-08-27 19:56:09 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							556e975a30 
							
						 
					 
					
						
						
							
							various fixes  
						
						
						
					 
					
						2020-08-27 19:24:44 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ff4175e839 
							
						 
					 
					
						
						
							
							Add more info to debug config  
						
						
						
					 
					
						2020-08-27 18:17:58 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							559b65f2e0 
							
						 
					 
					
						
						
							
							adjust references to null_annotation_setter to trfdata_setter  
						
						
						
					 
					
						2020-08-27 09:43:32 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							696f167478 
							
						 
					 
					
						
						
							
							Add diff example to docs [ci skip]  
						
						
						
					 
					
						2020-08-26 15:57:54 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							90d88729e0 
							
						 
					 
					
						
						
							
							Add AttributeRuler.score ( #5963 )  
						
						... 
						
						
						
						* Add AttributeRuler.score
Add scoring for TAG / POS / MORPH / LEMMA if these are present in the
assigned token attributes.
Add default score weights (that don't really make a lot of sense) so
that the scores are in the default config in some form.
* Update docs 
						
					 
					
						2020-08-26 15:39:30 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							ec069627fe 
							
						 
					 
					
						
						
							
							rename to TransformerListener  
						
						
						
					 
					
						2020-08-26 13:31:01 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							627617a079 
							
						 
					 
					
						
						
							
							Tidy up and add docs [ci skip]  
						
						
						
					 
					
						2020-08-26 13:24:55 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							15902c5aa2 
							
						 
					 
					
						
						
							
							fix link  
						
						
						
					 
					
						2020-08-26 11:51:57 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							feb86d5206 
							
						 
					 
					
						
						
							
							clarify default  
						
						
						
					 
					
						2020-08-26 11:21:30 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							8ac5ef1284 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-25 11:54:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							e559867605 
							
						 
					 
					
						
						
							
							Allow spacy project to push and pull to/from remote storage ( #5949 )  
						
						... 
						
						
						
						* Add utils for working with remote storage
* WIP add remote_cache for project
* WIP add push and pull commands
* Use pathy in remote_cache
* Updarte util
* Update remote_cache
* Update util
* Update project assets
* Update pull script
* Update push script
* Fix type annotation in util
* Work on remote storage
* Remove site and env hash
* Fix imports
* Fix type annotation
* Require pathy
* Require pathy
* Fix import
* Add a util to handle project variable substitution
* Import push and pull commands
* Fix pull command
* Fix push command
* Fix tarfile in remote_storage
* Improve printing
* Fiddle with status messages
* Set version to v3.0.0a9
* Draft docs for spacy project remote storages
* Update docs [ci skip]
* Use Thinc config to simplify and unify template variables
* Auto-format
* Don't import Pathy globally for now
Causes slow and annoying Google Cloud warning
* Tidy up test
* Tidy up and update tests
* Update to latest Thinc
* Update docs
* variables -> vars
* Update docs [ci skip]
* Update docs [ci skip]
Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-08-23 18:32:09 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c7c9b0451f 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-22 13:52:52 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							71aeae89c5 
							
						 
					 
					
						
						
							
							Merge pull request  #5948  from svlandeg/feature/docs-docs-docs [ci skip]  
						
						
						
					 
					
						2020-08-22 12:18:47 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							f102164a1f 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-21 19:34:06 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							1b7cfa7347 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/docs-docs-docs  
						
						
						
					 
					
						2020-08-21 18:36:18 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							dc98f69b57 
							
						 
					 
					
						
						
							
							alphabetize registries  
						
						
						
					 
					
						2020-08-21 18:10:21 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							518a1f97f3 
							
						 
					 
					
						
						
							
							remove outdated TODO's  
						
						
						
					 
					
						2020-08-21 17:55:15 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							e92bd6e1c1 
							
						 
					 
					
						
						
							
							alphabetize training lists  
						
						
						
					 
					
						2020-08-21 17:42:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							74cb6d39d0 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-21 16:11:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f5bcc10268 
							
						 
					 
					
						
						
							
							Update architectures  
						
						
						
					 
					
						2020-08-21 15:34:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ed8f4504b 
							
						 
					 
					
						
						
							
							Update API docs for architectures  
						
						
						
					 
					
						2020-08-21 15:22:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							52bd3a8b48 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-21 13:22:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e60442d83a 
							
						 
					 
					
						
						
							
							Adjust label casing in displaCy NER visualizer ( resolves   #4866 )  
						
						... 
						
						
						
						- Accept any case for label names in ents and colors option, even if actual predicted label uses different casing
- Don't text-transform: uppercase visually, if it's important to users that the label is represented as-is in the UI 
						
					 
					
						2020-08-21 11:51:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							04e4d59235 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-20 16:17:25 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							410b54e10e 
							
						 
					 
					
						
						
							
							Update website/docs/api/data-formats.md  
						
						... 
						
						
						
						Co-authored-by: Ines Montani <ines@ines.io> 
						
					 
					
						2020-08-20 11:15:34 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							ae719b354f 
							
						 
					 
					
						
						
							
							fix typos  
						
						
						
					 
					
						2020-08-20 10:20:40 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							f728c00cbb 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/update-more-docs  
						
						... 
						
						
						
						# Conflicts:
#	website/docs/api/data-formats.md 
						
					 
					
						2020-08-20 10:02:13 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							229033831a 
							
						 
					 
					
						
						
							
							add explanation of raw_text  
						
						
						
					 
					
						2020-08-20 10:00:45 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							ea6640ea72 
							
						 
					 
					
						
						
							
							Merge pull request  #5939  from explosion/feature/thinc-v8.0.0a28  
						
						... 
						
						
						
						Update Thinc and config variables 
						
					 
					
						2020-08-19 21:14:36 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							09f3cfc985 
							
						 
					 
					
						
						
							
							add version  
						
						
						
					 
					
						2020-08-19 19:58:45 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							7d9f00bdbf 
							
						 
					 
					
						
						
							
							waltzing schedule  
						
						
						
					 
					
						2020-08-19 19:53:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3dd390b1a1 
							
						 
					 
					
						
						
							
							Update Thinc and config variables  
						
						
						
					 
					
						2020-08-19 19:46:12 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							85b39639e1 
							
						 
					 
					
						
						
							
							small fix  
						
						
						
					 
					
						2020-08-19 19:17:36 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							169b5bcda0 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/update-docs  
						
						... 
						
						
						
						# Conflicts:
#	website/docs/usage/training.md 
						
					 
					
						2020-08-19 17:58:25 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							7119295a8a 
							
						 
					 
					
						
						
							
							badgers intro  
						
						
						
					 
					
						2020-08-19 17:53:22 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							648499157a 
							
						 
					 
					
						
						
							
							rename "custom models" to "custom functions"  
						
						
						
					 
					
						2020-08-19 16:53:51 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							63921161c8 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-19 16:04:21 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							60fedb8518 
							
						 
					 
					
						
						
							
							fix 2 more API lines  
						
						
						
					 
					
						2020-08-19 14:55:32 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							2dfd919585 
							
						 
					 
					
						
						
							
							add kb_loader and get_candidates back to EL API  
						
						
						
					 
					
						2020-08-19 14:52:49 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							225f8866a1 
							
						 
					 
					
						
						
							
							Fix consistency  
						
						
						
					 
					
						2020-08-19 12:47:57 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							2285e59765 
							
						 
					 
					
						
						
							
							Merge pull request  #5933  from svlandeg/feature/more-v3-docs [ci skip]  
						
						
						
					 
					
						2020-08-19 11:29:02 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							13291e97ba 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-19 00:28:37 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							0d55b6ebb4 
							
						 
					 
					
						
						
							
							formatting  
						
						
						
					 
					
						2020-08-18 18:55:56 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							abba639565 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/more-v3-docs  
						
						
						
					 
					
						2020-08-18 18:55:12 +02:00 
						 
				 
			
				
					
						
							
							
								Sofie Van Landeghem 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							358cbb21e3 
							
						 
					 
					
						
						
							
							Define candidate generator in EL config ( #5876 )  
						
						... 
						
						
						
						* candidate generator as separate part of EL config
* update comment
* ent instead of str as input for candidate generation
* Span instead of str: correct type indication
* fix types
* unit test to create new candidate generator
* fix replace_pipe argument passing
* move error message, general cleanup
* add vocab back to KB constructor
* provide KB as callable from Vocab arg
* rename to kb_loader, fix KB serialization as part of the EL pipe
* fix typo
* reformatting
* cleanup
* fix comment
* fix wrongly duplicated code from merge conflict
* rename dump to to_disk
* from_disk instead of load_bulk
* update test after recent removal of set_morphology in tagger
* remove old doc 
						
					 
					
						2020-08-18 16:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							82f0e20318 
							
						 
					 
					
						
						
							
							Update docs and consistency [ci skip]  
						
						
						
					 
					
						2020-08-18 14:39:40 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							705e1cb06c 
							
						 
					 
					
						
						
							
							typo in link  
						
						
						
					 
					
						2020-08-18 12:04:05 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							f7b76d2d83 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/more-v3-docs  
						
						
						
					 
					
						2020-08-18 11:57:52 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1c3bcfb488 
							
						 
					 
					
						
						
							
							Update docs and util consistency  
						
						
						
					 
					
						2020-08-18 01:22:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							728fec0194 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-18 00:49:19 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							990c6b4c32 
							
						 
					 
					
						
						
							
							Update docs and CLI [ci skip]  
						
						
						
					 
					
						2020-08-17 21:38:20 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							4fe4bab1c9 
							
						 
					 
					
						
						
							
							typo fixes  
						
						
						
					 
					
						2020-08-17 17:10:15 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							da80c18660 
							
						 
					 
					
						
						
							
							merge develop into branch  
						
						
						
					 
					
						2020-08-17 16:57:18 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							3ae5e02f4f 
							
						 
					 
					
						
						
							
							Update docs, types and API consistency  
						
						
						
					 
					
						2020-08-17 16:45:24 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							319692aa53 
							
						 
					 
					
						
						
							
							fix typos  
						
						
						
					 
					
						2020-08-17 14:05:48 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2ac4b0ef3e 
							
						 
					 
					
						
						
							
							Finish Transformer docs [ci skip]  
						
						
						
					 
					
						2020-08-16 15:56:32 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							6ae83bde0c 
							
						 
					 
					
						
						
							
							Fix CLI consistency [ci skip]  
						
						
						
					 
					
						2020-08-16 15:46:29 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a570c304df 
							
						 
					 
					
						
						
							
							Update quickstart, template and docs  
						
						
						
					 
					
						2020-08-15 14:50:29 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							950832f087 
							
						 
					 
					
						
						
							
							Tidy up pipes ( #5906 )  
						
						... 
						
						
						
						* Tidy up pipes
* Fix init, defaults and raise custom errors
* Update docs
* Update docs [ci skip]
* Apply suggestions from code review
Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com>
* Tidy up error handling and validation, fix consistency
* Simplify get_examples check
* Remove unused import [ci skip]
Co-authored-by: Matthew Honnibal <honnibal+gh@gmail.com> 
						
					 
					
						2020-08-11 23:29:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b7ec06e331 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-11 20:57:23 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							023ba7ae26 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-10 17:13:11 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c099f6eece 
							
						 
					 
					
						
						
							
							Add Token.lex  
						
						
						
					 
					
						2020-08-10 16:43:52 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							64f2f84098 
							
						 
					 
					
						
						
							
							Update docstrings and docs [ci skip]  
						
						
						
					 
					
						2020-08-10 13:45:22 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							12052bd8f6 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-10 01:20:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							0832cdd443 
							
						 
					 
					
						
						
							
							Fix formatting [ci skip]  
						
						
						
					 
					
						2020-08-10 00:46:32 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d611cbef43 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-10 00:42:26 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							c044460823 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-10 00:01:38 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							d5c78c7a34 
							
						 
					 
					
						
						
							
							Update docs and fix consistency  
						
						
						
					 
					
						2020-08-09 22:31:52 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a15c5fb191 
							
						 
					 
					
						
						
							
							Update docstrings and docs  
						
						
						
					 
					
						2020-08-09 16:10:48 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							46bc513a4e 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-07 20:14:31 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							fe29ceec9e 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into docs/model-docstrings  
						
						
						
					 
					
						2020-08-07 18:42:01 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							470b6f8073 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-07 18:41:15 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b7e34c1451 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-07 16:13:13 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							6f3649923c 
							
						 
					 
					
						
						
							
							Merge pull request  #5893  from explosion/feature/validate-arg  
						
						
						
					 
					
						2020-08-07 15:47:20 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							e962784531 
							
						 
					 
					
						
						
							
							Add Lemmatizer and simplify related components ( #5848 )  
						
						... 
						
						
						
						* Add Lemmatizer and simplify related components
* Add `Lemmatizer` pipe with `lookup` and `rule` modes using the
`Lookups` tables.
* Reduce `Tagger` to a simple tagger that sets `Token.tag` (no pos or lemma)
* Reduce `Morphology` to only keep track of morph tags (no tag map, lemmatizer,
or morph rules)
* Remove lemmatizer from `Vocab`
* Adjust many many tests
Differences:
* No default lookup lemmas
* No special treatment of TAG in `from_array` and similar required
* Easier to modify labels in a `Tagger`
* No extra strings added from morphology / tag map
* Fix test
* Initial fix for Lemmatizer config/serialization
* Adjust init test to be more generic
* Adjust init test to force empty Lookups
* Add simple cache to rule-based lemmatizer
* Convert language-specific lemmatizers
Convert language-specific lemmatizers to component lemmatizers. Remove
previous lemmatizer class.
* Fix French and Polish lemmatizers
* Remove outdated UPOS conversions
* Update Russian lemmatizer init in tests
* Add minimal init/run tests for custom lemmatizers
* Add option to overwrite existing lemmas
* Update mode setting, lookup loading, and caching
* Make `mode` an immutable property
* Only enforce strict `load_lookups` for known supported modes
* Move caching into individual `_lemmatize` methods
* Implement strict when lang is not found in lookups
* Fix tables/lookups in make_lemmatizer
* Reallow provided lookups and allow for stricter checks
* Add lookups asset to all Lemmatizer pipe tests
* Rename lookups in lemmatizer init test
* Clean up merge
* Refactor lookup table loading
* Add helper from `load_lemmatizer_lookups` that loads required and
optional lookups tables based on settings provided by a config.
Additional slight refactor of lookups:
* Add `Lookups.set_table` to set a table from a provided `Table`
* Reorder class definitions to be able to specify type as `Table`
* Move registry assets into test methods
* Refactor lookups tables config
Use class methods within `Lemmatizer` to provide the config for
particular modes and to load the lookups from a config.
* Add pipe and score to lemmatizer
* Simplify Tagger.score
* Add missing import
* Clean up imports and auto-format
* Remove unused kwarg
* Tidy up and auto-format
* Update docstrings for Lemmatizer
Update docstrings for Lemmatizer.
Additionally modify `is_base_form` API to take `Token` instead of
individual features.
* Update docstrings
* Remove tag map values from Tagger.add_label
* Update API docs
* Fix relative link in Lemmatizer API docs 
						
					 
					
						2020-08-07 15:27:13 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							4aecccf153 
							
						 
					 
					
						
						
							
							Update API docs for AttributeRuler.__init__  
						
						
						
					 
					
						2020-08-07 15:17:25 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							a8404c3517 
							
						 
					 
					
						
						
							
							validation -> validate  
						
						
						
					 
					
						2020-08-07 14:43:47 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							1d01d89b79 
							
						 
					 
					
						
						
							
							Update CLI docs and evaluate command [ci skip]  
						
						
						
					 
					
						2020-08-07 14:40:58 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							ef2c67cca5 
							
						 
					 
					
						
						
							
							Add DocBin to/from_disk methods and update docs ( #5892 )  
						
						... 
						
						
						
						* Add DocBin to/from_disk methods and update docs
* Use DocBin.from_disk in Corpus 
						
					 
					
						2020-08-07 14:30:59 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							4ca08c6d5d 
							
						 
					 
					
						
						
							
							Merge pull request  #5891  from adrianeboyd/docs/attribute-ruler-api  
						
						... 
						
						
						
						Add AttributeRuler API docs 
						
					 
					
						2020-08-07 13:55:12 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							b8d0c23857 
							
						 
					 
					
						
						
							
							Add AttributeRuler API docs  
						
						... 
						
						
						
						With additional minor updates to AttributeRuler docstrings. 
						
					 
					
						2020-08-07 12:43:23 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							824f4b2107 
							
						 
					 
					
						
						
							
							casing consistent  
						
						
						
					 
					
						2020-08-06 23:20:13 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							b17db0e994 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/develop' into feature/el-docs  
						
						... 
						
						
						
						# Conflicts:
#	website/docs/usage/training.md 
						
					 
					
						2020-08-06 19:48:52 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							49ddeb99ea 
							
						 
					 
					
						
						
							
							add textcat architectures documentation  
						
						
						
					 
					
						2020-08-06 19:44:47 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e5995904d6 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-06 19:30:43 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							e8fd0c1f1e 
							
						 
					 
					
						
						
							
							EL architectures documentation  
						
						
						
					 
					
						2020-08-06 17:41:26 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							f396f091dc 
							
						 
					 
					
						
						
							
							update EL API  
						
						
						
					 
					
						2020-08-06 16:40:48 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							81d0b1c390 
							
						 
					 
					
						
						
							
							update EL pipe arguments  
						
						
						
					 
					
						2020-08-06 16:22:50 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							0b4d1e1bc4 
							
						 
					 
					
						
						
							
							'debug data' instead of 'debug-data'  
						
						
						
					 
					
						2020-08-06 15:47:31 +02:00 
						 
				 
			
				
					
						
							
							
								svlandeg 
							
						 
					 
					
						
						
						
						
							
						
						
							881e3f8fd0 
							
						 
					 
					
						
						
							
							add docbin explanation and example  
						
						
						
					 
					
						2020-08-06 15:29:44 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							5d417d3b19 
							
						 
					 
					
						
						
							
							WIP: Update docs [ci skip]  
						
						
						
					 
					
						2020-08-06 13:10:15 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							06e80d95cd 
							
						 
					 
					
						
						
							
							Sync develop with nightly docs state ( #5883 )  
						
						... 
						
						
						
						Co-authored-by: svlandeg <sofie.vanlandeghem@gmail.com> 
						
					 
					
						2020-08-06 00:28:14 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							5cc0d89fad 
							
						 
					 
					
						
						
							
							Simplify config overrides in CLI and deserialization ( #5880 )  
						
						
						
					 
					
						2020-08-05 23:35:09 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							50311a4d37 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-08-05 20:29:53 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							2a4d56e730 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-05 15:01:00 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							cdec46493f 
							
						 
					 
					
						
						
							
							Update docs  
						
						
						
					 
					
						2020-08-05 15:00:54 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							c62fd878a3 
							
						 
					 
					
						
						
							
							Allow Doc.char_span to snap to token boundaries ( #5849 )  
						
						... 
						
						
						
						* Allow Doc.char_span to snap to token boundaries
Add a `mode` option to allow `Doc.char_span` to snap to token
boundaries. The `mode` options:
* `strict`: character offsets must match token boundaries (default, same as
before)
* `inside`: all tokens completely within the character span
* `outside`: all tokens at least partially covered by the character span
Add a new helper function `token_by_char` that returns the token
corresponding to a character position in the text. Update
`token_by_start` and `token_by_end` to use `token_by_char` for more
efficient searching.
* Remove unused import
* Rename mode to alignment_mode
Rename `mode` to `alignment_mode` with the options
`strict`/`contract`/`expand`. Any unrecognized modes are silently
converted to `strict`. 
						
					 
					
						2020-08-04 13:36:32 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
							
							
						
						
						
							
						
						
							4c055f0aa7 
							
						 
					 
					
						
						
							
							Add init CLI and init config ( #5854 )  
						
						... 
						
						
						
						* Add init CLI and init config draft
* Improve config validation
* Auto-format
* Don't export anything in debug config
* Update docs 
						
					 
					
						2020-08-02 15:18:30 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b40f44419b 
							
						 
					 
					
						
						
							
							Simplify pipe analysis  
						
						... 
						
						
						
						- remove unused code
- don't print by default
- integrate attrs info into analysis output 
						
					 
					
						2020-08-01 13:40:06 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							98c6a85c8b 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-07-31 18:55:38 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e9e8fa2466 
							
						 
					 
					
						
						
							
							Update docs and types  
						
						
						
					 
					
						2020-07-31 17:02:54 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							5a221f79c2 
							
						 
					 
					
						
						
							
							Revert "Remove keyword-only from Scorer API docs" [ci skip]  
						
						... 
						
						
						
						This reverts commit 7a6ac47dc1 
						
					 
					
						2020-07-31 14:00:21 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							9b509aa87f 
							
						 
					 
					
						
						
							
							Move Language.evaluate scorer config to new arg  
						
						... 
						
						
						
						Move `Language.evaluate` scorer config from `component_cfg` to separate
argument `scorer_cfg`. 
						
					 
					
						2020-07-31 11:05:16 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							9d79916792 
							
						 
					 
					
						
						
							
							Merge branch 'develop' into feature/scorer-adjustments  
						
						
						
					 
					
						2020-07-31 10:48:14 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9c80cb673d 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-07-29 19:41:34 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							9f69afdd1e 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-07-29 19:09:44 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							6a5c853edb 
							
						 
					 
					
						
						
							
							Fix docs [ci skip]  
						
						
						
					 
					
						2020-07-29 18:45:12 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							158d8c1e48 
							
						 
					 
					
						
						
							
							Update docs [ci skip]  
						
						
						
					 
					
						2020-07-29 18:44:10 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							b0f57a0cac 
							
						 
					 
					
						
						
							
							Update docs and consistency  
						
						
						
					 
					
						2020-07-29 15:14:07 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							e0ffe36e79 
							
						 
					 
					
						
						
							
							Update docstrings, docs and types  
						
						
						
					 
					
						2020-07-29 11:36:42 +02:00 
						 
				 
			
				
					
						
							
							
								Adriane Boyd 
							
						 
					 
					
						
						
						
						
							
						
						
							7a6ac47dc1 
							
						 
					 
					
						
						
							
							Remove keyword-only from Scorer API docs  
						
						
						
					 
					
						2020-07-29 10:40:30 +02:00 
						 
				 
			
				
					
						
							
							
								Ines Montani 
							
						 
					 
					
						
						
						
						
							
						
						
							ac24adec73 
							
						 
					 
					
						
						
							
							Small adjustments to Scorer and docs  
						
						
						
					 
					
						2020-07-28 21:39:42 +02:00