..  
		
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										af
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										am
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-02-13 12:55:56 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ar
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										az
									
								 
							
						
					 
				 
				
					
						
							
							Fix Azerbaijani init, extend lang init tests ( #8656 ) 
						
					 
				 
				2021-07-09 15:36:35 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										bg
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up code 
						
					 
				 
				2021-06-28 12:08:15 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										bn
									
								 
							
						
					 
				 
				
					
						
							
							Implement overwrite param for all custom lemmatizers ( #6794 ) 
						
					 
				 
				2021-01-26 14:53:43 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ca
									
								 
							
						
					 
				 
				
					
						
							
							Update Catalan language data ( #8308 ) 
						
					 
				 
				2021-06-11 10:21:22 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										cs
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-05 13:41:53 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										da
									
								 
							
						
					 
				 
				
					
						
							
							Merge remote-tracking branch 'upstream/master' into chore/update-develop-from-master-rc3 
						
					 
				 
				2021-01-14 11:49:58 +01:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										de
									
								 
							
						
					 
				 
				
					
						
							
							Merge branch 'develop' into master-tmp 
						
					 
				 
				2020-10-04 14:52:20 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										el
									
								 
							
						
					 
				 
				
					
						
							
							Fix non-deterministic deduplication in Greek lemmatizer ( #8421 ) 
						
					 
				 
				2021-06-17 09:11:01 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										en
									
								 
							
						
					 
				 
				
					
						
							
							Fix/fix en ordinals ( #8028 ) 
						
					 
				 
				2021-05-07 10:26:42 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										es
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-30 12:52:33 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										et
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										eu
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										fa
									
								 
							
						
					 
				 
				
					
						
							
							Implement overwrite param for all custom lemmatizers ( #6794 ) 
						
					 
				 
				2021-01-26 14:53:43 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										fi
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up code 
						
					 
				 
				2021-06-28 12:08:15 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										fr
									
								 
							
						
					 
				 
				
					
						
							
							Improvements to French stopwords list ( #7941 ) 
						
					 
				 
				2021-06-02 11:50:49 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ga
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										gu
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										he
									
								 
							
						
					 
				 
				
					
						
							
							raise NotImplementedError when noun_chunks iterator is not implemented ( #6711 ) 
						
					 
				 
				2021-01-17 19:56:05 +08:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										hi
									
								 
							
						
					 
				 
				
					
						
							
							Auto-format [ci skip] 
						
					 
				 
				2020-10-15 10:08:53 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										hr
									
								 
							
						
					 
				 
				
					
						
							
							Remove tag map 
						
					 
				 
				2020-12-09 11:13:49 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										hu
									
								 
							
						
					 
				 
				
					
						
							
							Fix Hungarian % tokenization ( #6013 ) 
						
					 
				 
				2020-09-02 13:06:16 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										hy
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										id
									
								 
							
						
					 
				 
				
					
						
							
							Merge branch 'develop' into master-tmp 
						
					 
				 
				2020-10-04 14:52:20 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										is
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										it
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up code 
						
					 
				 
				2021-06-28 12:08:15 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ja
									
								 
							
						
					 
				 
				
					
						
							
							Add lexeme norm defaults 
						
					 
				 
				2020-09-30 10:20:14 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										kn
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ko
									
								 
							
						
					 
				 
				
					
						
							
							Add lexeme norm defaults 
						
					 
				 
				2020-09-30 10:20:14 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ky
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-30 12:52:33 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										lb
									
								 
							
						
					 
				 
				
					
						
							
							Remove default initialize lookups 
						
					 
				 
				2020-10-01 21:54:33 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										lij
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										lt
									
								 
							
						
					 
				 
				
					
						
							
							Fix escape sequence 
						
					 
				 
				2021-01-30 12:39:58 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										lv
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										mk
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-30 12:52:33 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ml
									
								 
							
						
					 
				 
				
					
						
							
							Add missing lex_attr_getters ( resolves   #5806  ) 
						
					 
				 
				2020-07-25 12:55:18 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										mr
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										nb
									
								 
							
						
					 
				 
				
					
						
							
							Add / to nb infixes ( #7991 ) 
						
					 
				 
				2021-05-04 11:00:10 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ne
									
								 
							
						
					 
				 
				
					
						
							
							Remove unicode declarations and update language data 
						
					 
				 
				2020-09-04 13:19:16 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										nl
									
								 
							
						
					 
				 
				
					
						
							
							Adding noun_chunks to the DUTCH language model (nl) ( #8529 ) 
						
					 
				 
				2021-07-14 14:01:02 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										pl
									
								 
							
						
					 
				 
				
					
						
							
							Implement overwrite param for all custom lemmatizers ( #6794 ) 
						
					 
				 
				2021-01-26 14:53:43 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										pt
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-15 11:57:36 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ro
									
								 
							
						
					 
				 
				
					
						
							
							Merge remote-tracking branch 'upstream/master' into chore/update-develop-from-master-rc3 
						
					 
				 
				2021-01-14 11:49:58 +01:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ru
									
								 
							
						
					 
				 
				
					
						
							
							Fix ru/uk lemmatizer mp with spawn ( #8657 ) 
						
					 
				 
				2021-07-09 15:36:56 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sa
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2020-09-29 21:39:28 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										si
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sk
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sl
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sq
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sr
									
								 
							
						
					 
				 
				
					
						
							
							Remove default initialize lookups 
						
					 
				 
				2020-10-01 21:54:33 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										sv
									
								 
							
						
					 
				 
				
					
						
							
							Implement overwrite param for all custom lemmatizers ( #6794 ) 
						
					 
				 
				2021-01-26 14:53:43 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ta
									
								 
							
						
					 
				 
				
					
						
							
							Merge branch 'develop' into master-tmp 
						
					 
				 
				2020-10-15 09:06:03 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										te
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										th
									
								 
							
						
					 
				 
				
					
						
							
							Add Thai tag map (LST20 Corpus) ( #6163 ) 
						
					 
				 
				2020-10-07 11:12:01 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ti
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-15 11:57:36 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										tl
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										tn
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-02-13 12:55:56 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										tr
									
								 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2021-01-05 13:41:53 +11:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										tt
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										uk
									
								 
							
						
					 
				 
				
					
						
							
							Fix ru/uk lemmatizer mp with spawn ( #8657 ) 
						
					 
				 
				2021-07-09 15:36:56 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										ur
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										vi
									
								 
							
						
					 
				 
				
					
						
							
							Update Vietnamese tokenizer ( #8099 ) 
						
					 
				 
				2021-05-17 18:16:20 +10:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										xx
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										yo
									
								 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								
								
									
									
									
										zh
									
								 
							
						
					 
				 
				
					
						
							
							Setup / install / quickstart updates 
						
					 
				 
				2020-10-23 11:27:54 +02:00  
		
			
			
			
			
				
					
						
							
								__init__.py 
							
						
					 
				 
				
					
						
							
							Remove imports in /lang/__init__.py 
						
					 
				 
				2017-05-08 23:58:07 +02:00  
		
			
			
			
			
				
					
						
							
								char_classes.py 
							
						
					 
				 
				
					
						
							
							Add all symbols in Unicode Currency Symbols block ( #8212 ) 
						
					 
				 
				2021-05-31 18:03:40 +10:00  
		
			
			
			
			
				
					
						
							
								lex_attrs.py 
							
						
					 
				 
				
					
						
							
							Merge branch 'develop' into master-tmp 
						
					 
				 
				2020-09-04 13:15:36 +02:00  
		
			
			
			
			
				
					
						
							
								norm_exceptions.py 
							
						
					 
				 
				
					
						
							
							Tidy up and auto-format 
						
					 
				 
				2020-02-18 15:38:18 +01:00  
		
			
			
			
			
				
					
						
							
								punctuation.py 
							
						
					 
				 
				
					
						
							
							Simplify language data and revert detailed configs 
						
					 
				 
				2020-07-24 14:50:26 +02:00  
		
			
			
			
			
				
					
						
							
								tokenizer_exceptions.py 
							
						
					 
				 
				
					
						
							
							Tidy up with flake8: imports, comparisons, etc. 
						
					 
				 
				2021-06-28 12:08:15 +02:00