Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e48862fa8
							
						
					 | 
					
						
						
							
							Remove print statement
						
						
						
						
						
					 | 
					
						2017-01-12 11:25:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fba67fa342
							
						
					 | 
					
						
						
							
							Fix Issue #736: Times were being tokenized with incorrect string values.
						
						
						
						
						
					 | 
					
						2017-01-12 11:21:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0dec90e9f7
							
						
					 | 
					
						
						
							
							Use global abbreviation data languages and remove duplicates
						
						
						
						
						
					 | 
					
						2017-01-08 20:36:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7a98ee5e5a
							
						
					 | 
					
						
						
							
							Merge language data change
						
						
						
						
						
					 | 
					
						2016-12-18 17:03:52 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							2b2ea8ca11
							
						
					 | 
					
						
						
							
							Reorganise language data
						
						
						
						
						
					 | 
					
						2016-12-18 16:54:19 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							1bff59a8db
							
						
					 | 
					
						
						
							
							Update English language data
						
						
						
						
						
					 | 
					
						2016-12-18 15:36:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							704c7442e0
							
						
					 | 
					
						
						
							
							Break language data components into their own files
						
						
						
						
						
					 | 
					
						2016-12-18 15:36:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							28326649f3
							
						
					 | 
					
						
						
							
							Fix typo
						
						
						
						
						
					 | 
					
						2016-12-18 13:30:03 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							28d63ec58e
							
						
					 | 
					
						
						
							
							Restore missing '' character in tokenizer exceptions.
						
						
						
						
						
					 | 
					
						2016-12-18 05:34:51 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a9421652c9
							
						
					 | 
					
						
						
							
							Remove duplicates in tag map
						
						
						
						
						
					 | 
					
						2016-12-17 22:44:31 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							577adad945
							
						
					 | 
					
						
						
							
							Fix formatting
						
						
						
						
						
					 | 
					
						2016-12-17 14:00:52 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							bb94e784dc
							
						
					 | 
					
						
						
							
							Fix typo
						
						
						
						
						
					 | 
					
						2016-12-17 13:59:30 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a22322187f
							
						
					 | 
					
						
						
							
							Add missing lemmas to tokenizer exceptions (fixes #674)
						
						
						
						
						
					 | 
					
						2016-12-17 12:42:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							08162dce67
							
						
					 | 
					
						
						
							
							Move shared functions and constants to global language data
						
						
						
						
						
					 | 
					
						2016-12-17 12:32:48 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							d8d50a0334
							
						
					 | 
					
						
						
							
							Add tokenizer exception for "gonna" (fixes #691)
						
						
						
						
						
					 | 
					
						2016-12-17 11:59:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							c69b77d8aa
							
						
					 | 
					
						
						
							
							Revert "Add exception for "gonna""
						
						
						
						
						
						
						
						This reverts commit 280c03f67b. 
						
					 | 
					
						2016-12-17 11:56:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							280c03f67b
							
						
					 | 
					
						
						
							
							Add exception for "gonna"
						
						
						
						
						
					 | 
					
						2016-12-17 11:54:59 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0c39654786
							
						
					 | 
					
						
						
							
							Remove unused import
						
						
						
						
						
					 | 
					
						2016-12-08 19:46:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							e47ee94761
							
						
					 | 
					
						
						
							
							Split punctuation into its own file
						
						
						
						
						
					 | 
					
						2016-12-08 19:46:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							311b30ab35
							
						
					 | 
					
						
						
							
							Reorganize exceptions for English and German
						
						
						
						
						
					 | 
					
						2016-12-08 13:58:32 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							877f09218b
							
						
					 | 
					
						
						
							
							Add more custom rules for abbreviations
						
						
						
						
						
					 | 
					
						2016-12-08 12:47:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							ec44bee321
							
						
					 | 
					
						
						
							
							Fix capitalization on morphological features
						
						
						
						
						
					 | 
					
						2016-12-08 12:00:54 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							ce979553df
							
						
					 | 
					
						
						
							
							Resolve conflict
						
						
						
						
						
					 | 
					
						2016-12-07 21:16:52 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0d07d7fc80
							
						
					 | 
					
						
						
							
							Apply emoticon exceptions to tokenizer
						
						
						
						
						
					 | 
					
						2016-12-07 21:11:59 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							71f0f34cb3
							
						
					 | 
					
						
						
							
							Fix formatting
						
						
						
						
						
					 | 
					
						2016-12-07 21:11:29 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							1285c4ba93
							
						
					 | 
					
						
						
							
							Update English language data
						
						
						
						
						
					 | 
					
						2016-12-07 20:33:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a662a95294
							
						
					 | 
					
						
						
							
							Add line breaks
						
						
						
						
						
					 | 
					
						2016-12-07 20:33:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							e0712d1b32
							
						
					 | 
					
						
						
							
							Reformat language data
						
						
						
						
						
					 | 
					
						2016-12-07 20:33:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							4dcfafde02
							
						
					 | 
					
						
						
							
							Add line breaks
						
						
						
						
						
					 | 
					
						2016-11-24 14:57:37 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							de747e39e7
							
						
					 | 
					
						
						
							
							Reformat language data
						
						
						
						
						
					 | 
					
						2016-11-24 13:51:32 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							dad2c6cae9
							
						
					 | 
					
						
						
							
							Strip trailing whitespace
						
						
						
						
						
					 | 
					
						2016-11-20 16:45:51 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f0917b6808
							
						
					 | 
					
						
						
							
							Fix Issue #376: and/or was tagged as a noun.
						
						
						
						
						
					 | 
					
						2016-11-04 15:21:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							737816e86e
							
						
					 | 
					
						
						
							
							Fix #368: Tokenizer handled pattern 'unicode close quote, period' incorrectly.
						
						
						
						
						
					 | 
					
						2016-11-04 15:16:20 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							41a90a7fbb
							
						
					 | 
					
						
						
							
							Add tokenizer exception for 'Ph.D.', to fix 592.
						
						
						
						
						
					 | 
					
						2016-11-03 00:03:34 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d7e9acdcdf
							
						
					 | 
					
						
						
							
							Add English language data, so that the tokenizer doesn't require the data download
						
						
						
						
						
					 | 
					
						2016-09-25 14:49:00 +02:00 | 
					
					
						
						
							
							
							
						
					 |