Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							e10d4ca964
							
						
					 | 
					
						
						
							
							Remove semi-redundant URLs and punctuation for faster testing
						
						
						
						
						
					 | 
					
						2017-01-10 15:54:25 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a3cb2c90c
							
						
					 | 
					
						
						
							
							Add unicode declaration
						
						
						
						
						
					 | 
					
						2017-01-10 15:53:15 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							271997a146
							
						
					 | 
					
						
						
							
							Merge branch 'master' of ssh://github.com/explosion/spaCy
						
						
						
						
						
					 | 
					
						2017-01-09 23:48:55 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0f9b8a00a5
							
						
					 | 
					
						
						
							
							Unbreak data download
						
						
						
						
						
					 | 
					
						2017-01-09 23:40:26 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							05b3668916
							
						
					 | 
					
						
						
							
							Remove bold formatting as it occasionally causes markup errors
						
						
						
						
						
					 | 
					
						2017-01-09 20:26:09 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8ae8b443f1
							
						
					 | 
					
						
						
							
							Add richcmp method to Token. Closes #631
						
						
						
						
						
					 | 
					
						2017-01-09 19:30:31 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							64f747cb65
							
						
					 | 
					
						
						
							
							Token comparison test
						
						
						
						
						
					 | 
					
						2017-01-09 19:12:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							18c3c2d05c
							
						
					 | 
					
						
						
							
							Add tests for token comparison, re Issue #631
						
						
						
						
						
					 | 
					
						2017-01-09 19:09:59 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c9fdd9917c
							
						
					 | 
					
						
						
							
							Require older thinc
						
						
						
						
						
					 | 
					
						2017-01-09 10:12:41 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							97a1286129
							
						
					 | 
					
						
						
							
							Revert changes to tagger and parser for thinc 6
						
						
						
						
						
					 | 
					
						2017-01-09 10:08:34 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							95a52005df
							
						
					 | 
					
						
						
							
							Revert "Fix Issue #683: Add 'SP' to tag_map, if it's not there already, within the Morphology class."
						
						
						
						
						
						
						
						This reverts commit 40e71586d6. 
						
					 | 
					
						2017-01-09 09:55:55 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4ff92184f1
							
						
					 | 
					
						
						
							
							Improve train_ud script
						
						
						
						
						
					 | 
					
						2017-01-09 09:53:46 -06:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							363f09e68c
							
						
					 | 
					
						
						
							
							Merge pull request #726 from magnusburton/master
						
						
						
						
						
						
						
						Added Swedish abbreviations as token exceptions 
						
					 | 
					
						2017-01-09 14:58:15 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7108ad9d80
							
						
					 | 
					
						
						
							
							Require thinc 6.1
						
						
						
						
						
					 | 
					
						2017-01-09 14:37:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							42cd598f57
							
						
					 | 
					
						
						
							
							Use correct fixtures in URL tokenizer
						
						
						
						
						
					 | 
					
						2017-01-09 14:10:40 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d9a77ddf14
							
						
					 | 
					
						
						
							
							Return None for data path if it doesn't exist
						
						
						
						
						
					 | 
					
						2017-01-09 14:10:05 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e4862d1dab
							
						
					 | 
					
						
						
							
							Merge branch 'develop'
						
						
						
						
						
					 | 
					
						2017-01-09 13:36:01 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							aa876884f0
							
						
					 | 
					
						
						
							
							Revert "Revert "Merge remote-tracking branch 'origin/master'""
						
						
						
						
						
						
						
						This reverts commit fb9d3bb022. 
						
					 | 
					
						2017-01-09 13:28:13 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3eb6a929f3
							
						
					 | 
					
						
						
							
							Merge branch 'master' of ssh://github.com/explosion/spaCy
						
						
						
						
						
					 | 
					
						2017-01-09 13:21:56 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c1ef07788c
							
						
					 | 
					
						
						
							
							Update train_ud.py
						
						
						
						
						
						
						
						Create deps folder if it doesn't exist. 
						
					 | 
					
						2017-01-09 10:55:44 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							57fab43a3a
							
						
					 | 
					
						
						
							
							Add info on language-specific issue labels
						
						
						
						
						
					 | 
					
						2017-01-09 00:42:25 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							d87ca84028
							
						
					 | 
					
						
						
							
							Remove old website example tests from setup.py
						
						
						
						
						
					 | 
					
						2017-01-08 22:42:54 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							d5c72c40eb
							
						
					 | 
					
						
						
							
							Remove old tests for old website example code
						
						
						
						
						
					 | 
					
						2017-01-08 22:28:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							eef94e3ee2
							
						
					 | 
					
						
						
							
							Split off period after two or more uppercase letters (fixes #483)
						
						
						
						
						
					 | 
					
						2017-01-08 22:28:25 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a89a6000e5
							
						
					 | 
					
						
						
							
							Remove unused import
						
						
						
						
						
					 | 
					
						2017-01-08 22:17:37 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							5d28664fc5
							
						
					 | 
					
						
						
							
							Don't test Hungarian for numbers and hyphens for now
						
						
						
						
						
						
						
						Reinvestigate behaviour of case affixes given reorganised tokenizer
patterns. 
						
					 | 
					
						2017-01-08 20:45:40 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							53362b6b93
							
						
					 | 
					
						
						
							
							Reorganise Hungarian prefixes/suffixes/infixes
						
						
						
						
						
						
						
						Use global prefixes and suffixes for non-language-specific rules,
import list of alpha unicode characters and adjust regexes. 
						
					 | 
					
						2017-01-08 20:40:33 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							347c4a2d06
							
						
					 | 
					
						
						
							
							Reorganise and reformat global tokenizer prefixes, suffixes and infixes
						
						
						
						
						
					 | 
					
						2017-01-08 20:37:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							0dec90e9f7
							
						
					 | 
					
						
						
							
							Use global abbreviation data languages and remove duplicates
						
						
						
						
						
					 | 
					
						2017-01-08 20:36:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							7c3cb2a652
							
						
					 | 
					
						
						
							
							Add global abbreviations data
						
						
						
						
						
					 | 
					
						2017-01-08 20:34:03 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							de5aa92bc2
							
						
					 | 
					
						
						
							
							Handle deprecated tokenizer prefix data
						
						
						
						
						
					 | 
					
						2017-01-08 20:33:28 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							abb09782f9
							
						
					 | 
					
						
						
							
							Move sun.txt to original location and fix path to not break parser tests
						
						
						
						
						
					 | 
					
						2017-01-08 20:32:54 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							57919566b8
							
						
					 | 
					
						
						
							
							Add Jupyter notebooks repo to resources list
						
						
						
						
						
					 | 
					
						2017-01-05 20:50:08 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							cab39c59c5
							
						
					 | 
					
						
						
							
							Add missing contractions to English tokenizer exceptions
						
						
						
						
						
						
						
						Inspired by
https://github.com/kootenpv/contractions/blob/master/contractions/__init
__.py 
						
					 | 
					
						2017-01-05 19:59:06 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a23504fe07
							
						
					 | 
					
						
						
							
							Move abbreviations below other exceptions
						
						
						
						
						
					 | 
					
						2017-01-05 19:58:07 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							7d2cf934b9
							
						
					 | 
					
						
						
							
							Generate he/she/it correctly with 's instead of 've
						
						
						
						
						
					 | 
					
						2017-01-05 19:57:00 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							8328925e1f
							
						
					 | 
					
						
						
							
							Add newlines to long German text
						
						
						
						
						
					 | 
					
						2017-01-05 18:13:30 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							55b46d7cf6
							
						
					 | 
					
						
						
							
							Add tokenizer tests for German
						
						
						
						
						
					 | 
					
						2017-01-05 18:11:25 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							5bb4081f52
							
						
					 | 
					
						
						
							
							Remove redundant test_tokenizer.py for English
						
						
						
						
						
					 | 
					
						2017-01-05 18:11:11 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							8216ba599b
							
						
					 | 
					
						
						
							
							Add tests for longer and mixed English texts
						
						
						
						
						
					 | 
					
						2017-01-05 18:11:04 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							65f937d5c6
							
						
					 | 
					
						
						
							
							Move basic contraction tests to test_contractions.py
						
						
						
						
						
					 | 
					
						2017-01-05 18:09:53 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							bbe7cab3a1
							
						
					 | 
					
						
						
							
							Move non-English-specific tests back to general tokenizer tests
						
						
						
						
						
					 | 
					
						2017-01-05 18:09:29 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							038002d616
							
						
					 | 
					
						
						
							
							Reformat HU tokenizer tests and adapt to general style
						
						
						
						
						
						
						
						Improve readability of test cases and add conftest.py with fixture 
						
					 | 
					
						2017-01-05 18:06:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							bc911322b3
							
						
					 | 
					
						
						
							
							Move ") to emoticons (see Tweebo challenge test)
						
						
						
						
						
					 | 
					
						2017-01-05 18:05:38 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							637f785036
							
						
					 | 
					
						
						
							
							Add general sanity tests for all tokenizers
						
						
						
						
						
					 | 
					
						2017-01-05 16:25:38 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							c5f2dc15de
							
						
					 | 
					
						
						
							
							Move English tokenizer tests to directory /en
						
						
						
						
						
					 | 
					
						2017-01-05 16:25:04 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							8b45363b4d
							
						
					 | 
					
						
						
							
							Modernize and merge general tokenizer tests
						
						
						
						
						
					 | 
					
						2017-01-05 13:17:05 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							02cfda48c9
							
						
					 | 
					
						
						
							
							Modernize and merge tokenizer tests for string loading
						
						
						
						
						
					 | 
					
						2017-01-05 13:16:55 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							a11f684822
							
						
					 | 
					
						
						
							
							Modernize and merge tokenizer tests for whitespace
						
						
						
						
						
					 | 
					
						2017-01-05 13:16:33 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Ines Montani
							
						 
					 | 
					
						
						
						
						
							
						
						
							8b284fc6f1
							
						
					 | 
					
						
						
							
							Modernize and merge tokenizer tests for text from file
						
						
						
						
						
					 | 
					
						2017-01-05 13:15:52 +01:00 | 
					
					
						
						
							
							
							
						
					 |