ines
							
						 
					 | 
					
						
						
						
						
							
						
						
							66c1f194f9
							
						
					 | 
					
						
						
							
							Use consistent unicode declarations
						
						
						
						
						
					 | 
					
						2017-03-12 13:07:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ea2592879f
							
						
					 | 
					
						
						
							
							Merge branch 'master' of https://github.com/explosion/spaCy
						
						
						
						
						
					 | 
					
						2017-03-11 11:13:37 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								ines
							
						 
					 | 
					
						
						
						
						
							
						
						
							b04893a059
							
						
					 | 
					
						
						
							
							Make regex locale-independent for Python 2
						
						
						
						
						
					 | 
					
						2017-03-10 14:21:57 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ea53647362
							
						
					 | 
					
						
						
							
							Merge branch 'develop'
						
						
						
						
						
					 | 
					
						2017-03-10 02:49:39 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Dan Rapp
							
						 
					 | 
					
						
						
						
						
							
						
						
							3b1df3808d
							
						
					 | 
					
						
						
							
							Issue #840 - URL pattenr too broad
						
						
						
						
						
					 | 
					
						2017-03-09 11:39:39 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Roman Inflianskas
							
						 
					 | 
					
						
						
						
						
							
						
						
							66e1109b53
							
						
					 | 
					
						
						
							
							Add support for Universal Dependencies v2.0
						
						
						
						
						
					 | 
					
						2017-03-03 13:17:34 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							012f4820cb
							
						
					 | 
					
						
						
							
							Keep infixes of punctuation + hyphens as one token (see #801)
						
						
						
						
						
					 | 
					
						2017-02-02 16:22:40 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							1219a5f513
							
						
					 | 
					
						
						
							
							Add = to tokenizer prefixes
						
						
						
						
						
					 | 
					
						2017-02-02 16:21:11 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							ff04748eb6
							
						
					 | 
					
						
						
							
							Add missing emoticon
						
						
						
						
						
					 | 
					
						2017-02-02 16:21:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							116c675c3c
							
						
					 | 
					
						
						
							
							Merge pull request #742 from oroszgy/hu_tokenizer_fix
						
						
						
						
						
						
						
						Improved Hungarian tokenizer 
						
					 | 
					
						2017-01-14 23:52:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gyorgy Orosz
							
						 
					 | 
					
						
						
						
						
							
						
						
							63037e79af
							
						
					 | 
					
						
						
							
							Fixed hyphen handling in the Hungarian tokenizer.
						
						
						
						
						
					 | 
					
						2017-01-14 16:30:11 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gyorgy Orosz
							
						 
					 | 
					
						
						
						
						
							
						
						
							be7a7aeb1a
							
						
					 | 
					
						
						
							
							Reversed accidental changes.
						
						
						
						
						
					 | 
					
						2017-01-14 15:59:36 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gyorgy Orosz
							
						 
					 | 
					
						
						
						
						
							
						
						
							1be5da1ac6
							
						
					 | 
					
						
						
							
							Fixed Hungarian tokenizer for numbers
						
						
						
						
						
					 | 
					
						2017-01-14 15:51:59 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0894b8c0ef
							
						
					 | 
					
						
						
							
							Don't split tokens with digits and "/" infixes (resolves #740)
						
						
						
						
						
					 | 
					
						2017-01-12 22:58:26 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fba67fa342
							
						
					 | 
					
						
						
							
							Fix Issue #736: Times were being tokenized with incorrect string values.
						
						
						
						
						
					 | 
					
						2017-01-12 11:21:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							aa876884f0
							
						
					 | 
					
						
						
							
							Revert "Revert "Merge remote-tracking branch 'origin/master'""
						
						
						
						
						
						
						
						This reverts commit fb9d3bb022. 
						
					 | 
					
						2017-01-09 13:28:13 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							eef94e3ee2
							
						
					 | 
					
						
						
							
							Split off period after two or more uppercase letters (fixes #483)
						
						
						
						
						
					 | 
					
						2017-01-08 22:28:25 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							347c4a2d06
							
						
					 | 
					
						
						
							
							Reorganise and reformat global tokenizer prefixes, suffixes and infixes
						
						
						
						
						
					 | 
					
						2017-01-08 20:37:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							7c3cb2a652
							
						
					 | 
					
						
						
							
							Add global abbreviations data
						
						
						
						
						
					 | 
					
						2017-01-08 20:34:03 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							bc911322b3
							
						
					 | 
					
						
						
							
							Move ") to emoticons (see Tweebo challenge test)
						
						
						
						
						
					 | 
					
						2017-01-05 18:05:38 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							fb9d3bb022
							
						
					 | 
					
						
						
							
							Revert "Merge remote-tracking branch 'origin/master'"
						
						
						
						
						
						
						
						This reverts commit d3b181cdf1, reversing
changes made to b19cfcc144. 
						
					 | 
					
						2017-01-03 18:21:36 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9936a1b9b5
							
						
					 | 
					
						
						
							
							Merge branch 'tokenization_w_exception_patterns' of https://github.com/oroszgy/spaCy.hu into oroszgy-tokenization_w_exception_patterns
						
						
						
						
						
					 | 
					
						2016-12-30 14:53:40 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Petter Hohle
							
						 
					 | 
					
						
						
						
						
							
						
						
							f112e7754e
							
						
					 | 
					
						
						
							
							Add PART to tag map
						
						
						
						
						
						
						
						16 of the 17 PoS tags in the UD tag set is added; PART is missing. 
						
					 | 
					
						2016-12-28 18:39:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gyorgy Orosz
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a9be4d485
							
						
					 | 
					
						
						
							
							Updated token exception handling mechanism to allow the usage of arbitrary functions as token exception matchers.
						
						
						
						
						
					 | 
					
						2016-12-23 23:49:34 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Gyorgy Orosz
							
						 
					 | 
					
						
						
						
						
							
						
						
							1748549aeb
							
						
					 | 
					
						
						
							
							Added exception pattern mechanism to the tokenizer.
						
						
						
						
						
					 | 
					
						2016-12-21 23:16:19 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							920fa0fed2
							
						
					 | 
					
						
						
							
							Add DET_LEMMA constant
						
						
						
						
						
					 | 
					
						2016-12-21 18:05:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e95737c6c
							
						
					 | 
					
						
						
							
							Add base tag map
						
						
						
						
						
					 | 
					
						2016-12-18 16:54:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							2b2ea8ca11
							
						
					 | 
					
						
						
							
							Reorganise language data
						
						
						
						
						
					 | 
					
						2016-12-18 16:54:19 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							bc40dad7d9
							
						
					 | 
					
						
						
							
							Add entity rules
						
						
						
						
						
					 | 
					
						2016-12-18 15:36:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							eaa3b1319d
							
						
					 | 
					
						
						
							
							Fix formatting
						
						
						
						
						
					 | 
					
						2016-12-18 15:36:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							62655fd36f
							
						
					 | 
					
						
						
							
							Add ENT_ID constant
						
						
						
						
						
					 | 
					
						2016-12-18 15:36:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							f324311249
							
						
					 | 
					
						
						
							
							Add global language data utils
						
						
						
						
						
					 | 
					
						2016-12-17 12:27:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							e47ee94761
							
						
					 | 
					
						
						
							
							Split punctuation into its own file
						
						
						
						
						
					 | 
					
						2016-12-08 19:46:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							e8ae588be9
							
						
					 | 
					
						
						
							
							Add emoticons
						
						
						
						
						
					 | 
					
						2016-12-08 19:45:18 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							5908c0ed9f
							
						
					 | 
					
						
						
							
							Fix formatting
						
						
						
						
						
					 | 
					
						2016-12-08 19:45:11 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0d07d7fc80
							
						
					 | 
					
						
						
							
							Apply emoticon exceptions to tokenizer
						
						
						
						
						
					 | 
					
						2016-12-07 21:11:59 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							9413bcd9ee
							
						
					 | 
					
						
						
							
							Declare encoding and unicode literals
						
						
						
						
						
					 | 
					
						2016-12-07 21:10:34 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a280ff2657
							
						
					 | 
					
						
						
							
							Fix __all__
						
						
						
						
						
					 | 
					
						2016-12-07 21:10:12 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							ba8721953c
							
						
					 | 
					
						
						
							
							Add missing emoticons
						
						
						
						
						
					 | 
					
						2016-12-07 21:09:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							79dce0aabe
							
						
					 | 
					
						
						
							
							Add emoticons
						
						
						
						
						
					 | 
					
						2016-12-07 20:33:28 +01:00 | 
					
					
						
						
							
							
							
						
					 |