Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f8004c5f65 
							
						 
					 
					
						
						
							
							* Begin upgrading to improved thinc API  
						
						
						
					 
					
						2015-11-05 03:53:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							adc7bbd6cf 
							
						 
					 
					
						
						
							
							* Fix name of like_num in default_lex_attrs  
						
						
						
					 
					
						2015-11-04 22:02:47 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e96faf29e7 
							
						 
					 
					
						
						
							
							* Rename like_number to like_num, to fix inconsistency re Issue  #166  
						
						
						
					 
					
						2015-11-04 22:01:44 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							65934b7cd4 
							
						 
					 
					
						
						
							
							* Enforce import of ujson in strings.pyx, because otherwise it's too slow  
						
						
						
					 
					
						2015-11-04 00:32:02 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1ce5d5602d 
							
						 
					 
					
						
						
							
							* Rename Doc.data to Doc.c  
						
						
						
					 
					
						2015-11-04 00:17:13 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							68f479e821 
							
						 
					 
					
						
						
							
							* Rename Doc.data to Doc.c  
						
						
						
					 
					
						2015-11-04 00:15:14 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ddea19b2b 
							
						 
					 
					
						
						
							
							* Rename spans.pyx to span.pyx  
						
						
						
					 
					
						2015-11-04 00:14:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9482d616bc 
							
						 
					 
					
						
						
							
							* Rename spans.pyx to span.pyx  
						
						
						
					 
					
						2015-11-03 23:51:05 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							116da5990a 
							
						 
					 
					
						
						
							
							* Clean up setting of tag in doc.from_bytes  
						
						
						
					 
					
						2015-11-03 23:48:57 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9ec7b9c454 
							
						 
					 
					
						
						
							
							* Clean up unused Constituent struct.  
						
						
						
					 
					
						2015-11-03 23:48:21 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1e99fcd413 
							
						 
					 
					
						
						
							
							* Rename .repvec to .vector in C API  
						
						
						
					 
					
						2015-11-03 23:47:59 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ee3f9ba581 
							
						 
					 
					
						
						
							
							* Fix test of serializer  
						
						
						
					 
					
						2015-11-03 19:45:16 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d06ba26371 
							
						 
					 
					
						
						
							
							* Fix test of serializer  
						
						
						
					 
					
						2015-11-03 19:43:27 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4083059650 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-11-03 09:07:19 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e37437ba8 
							
						 
					 
					
						
						
							
							* Fix assign_tag in doc.merge  
						
						
						
					 
					
						2015-11-03 19:07:02 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dde9e1357c 
							
						 
					 
					
						
						
							
							* Add todo to morphology.lemmatize  
						
						
						
					 
					
						2015-11-03 18:54:35 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ffedff9e6c 
							
						 
					 
					
						
						
							
							* Remove the archive after download, to save disk space  
						
						
						
					 
					
						2015-11-03 18:54:05 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85372468e3 
							
						 
					 
					
						
						
							
							* Fix serialize test  
						
						
						
					 
					
						2015-11-03 08:51:33 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							833eb35c57 
							
						 
					 
					
						
						
							
							* Fix tag assignment in doc.from_array  
						
						
						
					 
					
						2015-11-03 18:45:54 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09664177d7 
							
						 
					 
					
						
						
							
							* Fix tag handling in doc.merge, and assign sent_start when setting heads.  
						
						
						
					 
					
						2015-11-03 18:15:52 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							389a373807 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-11-03 18:07:25 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3f44b3e43f 
							
						 
					 
					
						
						
							
							* Mark serializer test as requiring models  
						
						
						
					 
					
						2015-11-03 18:07:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							25ed7be8f8 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-11-03 07:58:17 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							604ceac4c6 
							
						 
					 
					
						
						
							
							* Fix morphological assignment in doc.merge()  
						
						
						
					 
					
						2015-11-03 17:57:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5e040855a5 
							
						 
					 
					
						
						
							
							* Ensure morphological features and lemmas are loaded in from_array, re Issue  #152  
						
						
						
					 
					
						2015-11-03 17:56:50 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5668feb235 
							
						 
					 
					
						
						
							
							* Fix pickle test for python3  
						
						
						
					 
					
						2015-11-03 04:57:02 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6161d2529a 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-11-03 13:36:30 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5887506f5d 
							
						 
					 
					
						
						
							
							* Don't expect lexemes.bin in Vocab  
						
						
						
					 
					
						2015-11-03 13:23:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f7dd377575 
							
						 
					 
					
						
						
							
							* Adjust conjuncts iterator in Token  
						
						
						
					 
					
						2015-11-03 13:23:22 +11:00 
						 
				 
			
				
					
						
							
							
								Andreas Grivas 
							
						 
					 
					
						
						
						
						
							
						
						
							d418f00eb1 
							
						 
					 
					
						
						
							
							fixed error when printing unicode  
						
						
						
					 
					
						2015-11-02 20:23:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							52fc338001 
							
						 
					 
					
						
						
							
							* Set is_parsed and is_tagged attrs when loading annotations into Doc, re Issue  #152  
						
						
						
					 
					
						2015-10-28 10:43:22 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1c0356e4c2 
							
						 
					 
					
						
						
							
							* Set test file mode to w+t  
						
						
						
					 
					
						2015-10-26 22:40:48 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0fe98f358b 
							
						 
					 
					
						
						
							
							* Fix mode on text file for Python3 in strings test  
						
						
						
					 
					
						2015-10-26 22:25:16 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8ba9cf905e 
							
						 
					 
					
						
						
							
							* Fix mode on text file for Python3 in strings test  
						
						
						
					 
					
						2015-10-26 21:44:34 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a0730699b1 
							
						 
					 
					
						
						
							
							* Fix mode on text file for Python3 in strings test  
						
						
						
					 
					
						2015-10-26 21:25:56 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							725344d349 
							
						 
					 
					
						
						
							
							* Fix tempfile in test  
						
						
						
					 
					
						2015-10-26 21:08:18 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f11030aadc 
							
						 
					 
					
						
						
							
							* Remove out-dated TODO comment  
						
						
						
					 
					
						2015-10-26 12:33:38 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a371a1071d 
							
						 
					 
					
						
						
							
							* Save and load word vectors during pickling, re Issue  #125  
						
						
						
					 
					
						2015-10-26 12:33:04 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a824a98312 
							
						 
					 
					
						
						
							
							* Add tests for pickling vectors, re: Issue  #125  
						
						
						
					 
					
						2015-10-26 12:31:05 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							314090cc78 
							
						 
					 
					
						
						
							
							* Set vectors length when unpickling vocab, re Issue  #125  
						
						
						
					 
					
						2015-10-26 12:05:08 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4e16f9e435 
							
						 
					 
					
						
						
							
							* Move tests underneath spacy/  
						
						
						
					 
					
						2015-10-26 00:07:31 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3a6e48e814 
							
						 
					 
					
						
						
							
							Merge pull request  #149  from chrisdubois/pickle-patch  
						
						... 
						
						
						
						Add __reduce__ to Tokenizer so that English pickles. 
						
					 
					
						2015-10-25 15:30:31 +11:00 
						 
				 
			
				
					
						
							
							
								Chris DuBois 
							
						 
					 
					
						
						
						
						
							
						
						
							dac8fe7bdb 
							
						 
					 
					
						
						
							
							Add __reduce__ to Tokenizer so that English pickles.  
						
						... 
						
						
						
						- Add tests to test_pickle and test_tokenizer that save to tempfiles. 
						
					 
					
						2015-10-23 22:24:03 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ff4fe524ee 
							
						 
					 
					
						
						
							
							* Fix exception for python 2  
						
						
						
					 
					
						2015-10-23 01:56:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							341a3e85cd 
							
						 
					 
					
						
						
							
							* Upd downloaded data version  
						
						
						
					 
					
						2015-10-23 00:56:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f18fd8c659 
							
						 
					 
					
						
						
							
							* Fix language.py for change in StringStore load API  
						
						
						
					 
					
						2015-10-23 03:48:12 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							23855db3ca 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy into develop  
						
						
						
					 
					
						2015-10-23 03:46:09 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4f13849065 
							
						 
					 
					
						
						
							
							Merge pull request  #145  from henningpeters/master  
						
						... 
						
						
						
						better error reporting, cleanup 
						
					 
					
						2015-10-23 03:45:47 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3be94be0c0 
							
						 
					 
					
						
						
							
							Merge pull request  #148  from maxirmx/master  
						
						... 
						
						
						
						Utf8 encoding for lemma_rules.json 
						
					 
					
						2015-10-22 21:46:28 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c86bda8d1a 
							
						 
					 
					
						
						
							
							* Fix import of uget  
						
						
						
					 
					
						2015-10-22 21:13:56 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2348a08481 
							
						 
					 
					
						
						
							
							* Load/dump strings with a json file, instead of the hacky strings file we were using.  
						
						
						
					 
					
						2015-10-22 21:13:03 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9baf0abd59 
							
						 
					 
					
						
						
							
							* Save vocab after training.  
						
						
						
					 
					
						2015-10-22 21:09:14 +11:00 
						 
				 
			
				
					
						
							
							
								maxirmx 
							
						 
					 
					
						
						
						
						
							
						
						
							f07e4accd7 
							
						 
					 
					
						
						
							
							Fixing encoding issue  #4  
						
						
						
					 
					
						2015-10-21 20:45:56 +03:00 
						 
				 
			
				
					
						
							
							
								maxirmx 
							
						 
					 
					
						
						
						
						
							
						
						
							fcbfff043f 
							
						 
					 
					
						
						
							
							Fixing encoding issue  #3  
						
						
						
					 
					
						2015-10-21 15:52:34 +03:00 
						 
				 
			
				
					
						
							
							
								maxirmx 
							
						 
					 
					
						
						
						
						
							
						
						
							fe9d2e2c4e 
							
						 
					 
					
						
						
							
							Fixing encode issue  #2  
						
						
						
					 
					
						2015-10-21 15:36:21 +03:00 
						 
				 
			
				
					
						
							
							
								maxirmx 
							
						 
					 
					
						
						
						
						
							
						
						
							e4a1726f77 
							
						 
					 
					
						
						
							
							Fixing encoding issue  
						
						... 
						
						
						
						UTF-8 
						
					 
					
						2015-10-21 14:16:37 +03:00 
						 
				 
			
				
					
						
							
							
								Andreas Grivas 
							
						 
					 
					
						
						
						
						
							
						
						
							93ada458e2 
							
						 
					 
					
						
						
							
							added __repr__ that prints text in ipython for doc, token, and span objects  
						
						
						
					 
					
						2015-10-21 14:11:46 +03:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							ccffd2ef53 
							
						 
					 
					
						
						
							
							fixed extract directory  
						
						
						
					 
					
						2015-10-21 07:59:34 +02:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							da4c9cee06 
							
						 
					 
					
						
						
							
							assert filename match  
						
						
						
					 
					
						2015-10-20 19:33:59 +02:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							4f703f0cb4 
							
						 
					 
					
						
						
							
							better error reporting, cleanup  
						
						
						
					 
					
						2015-10-20 19:11:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9cdea6e450 
							
						 
					 
					
						
						
							
							* Import uget correctly  
						
						
						
					 
					
						2015-10-19 08:32:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6727a46bb5 
							
						 
					 
					
						
						
							
							* Fix Issue  #118 : Matcher behaves unpredictably when matches overlap.  
						
						
						
					 
					
						2015-10-19 16:45:32 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							135062d23c 
							
						 
					 
					
						
						
							
							* Fix error with merged text when merged region did not have trailing whitespace  
						
						
						
					 
					
						2015-10-19 15:47:04 +11:00 
						 
				 
			
				
					
						
							
							
								Henning Peters 
							
						 
					 
					
						
						
						
						
							
						
						
							bfde91fa49 
							
						 
					 
					
						
						
							
							add custom download tool (uget), replace wget with uget  
						
						
						
					 
					
						2015-10-18 12:35:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9839cd2c0b 
							
						 
					 
					
						
						
							
							* Fix whitespace_ calculation in Token  
						
						
						
					 
					
						2015-10-18 17:21:11 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c99285b8b9 
							
						 
					 
					
						
						
							
							* Clean up C++ usage in spacy/matcher.pyx  
						
						
						
					 
					
						2015-10-18 17:20:50 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a7e6c5ac8f 
							
						 
					 
					
						
						
							
							* Fix Issue  #122 : Incorrect calculation of children after Doc.merge()  
						
						
						
					 
					
						2015-10-18 17:17:27 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3ba66f2dc7 
							
						 
					 
					
						
						
							
							* Add string length cap in Tokenizer.__call__  
						
						
						
					 
					
						2015-10-16 04:54:16 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6e0f985afc 
							
						 
					 
					
						
						
							
							* Fix token.conjuncts  
						
						
						
					 
					
						2015-10-15 03:49:45 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2e0104ac81 
							
						 
					 
					
						
						
							
							* Fix token.conjuncts  
						
						
						
					 
					
						2015-10-15 03:47:45 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b8f3345a82 
							
						 
					 
					
						
						
							
							* Fix token.conjuncts method  
						
						
						
					 
					
						2015-10-15 03:36:01 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							23818f89b8 
							
						 
					 
					
						
						
							
							* Fix token.conjuncts method  
						
						
						
					 
					
						2015-10-15 03:34:57 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7a15d1b60c 
							
						 
					 
					
						
						
							
							* Add Python 2/3 compatibility fix for copy_reg  
						
						
						
					 
					
						2015-10-13 20:04:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							329ae57520 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment thing  
						
						
						
					 
					
						2015-10-13 09:46:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							37919eac82 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment in simpler way. Leaves problem with setting left/right children.  
						
						
						
					 
					
						2015-10-13 18:23:24 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c70eb776ae 
							
						 
					 
					
						
						
							
							* Fix whitespace attachment, so that left/right children are consistent with head.  
						
						
						
					 
					
						2015-10-13 15:58:22 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							531182f937 
							
						 
					 
					
						
						
							
							* Fix Model.__reduce__  
						
						
						
					 
					
						2015-10-13 15:14:38 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6c227a6c1f 
							
						 
					 
					
						
						
							
							* Fix Model.__reduce__  
						
						
						
					 
					
						2015-10-13 15:10:04 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							358c82595c 
							
						 
					 
					
						
						
							
							* Fix NAMES list in spacy/parts_of_speech.pyx  
						
						
						
					 
					
						2015-10-13 14:18:45 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1fdc487bc 
							
						 
					 
					
						
						
							
							Merge branch 'attrs'  
						
						
						
					 
					
						2015-10-13 14:03:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e886e6a406 
							
						 
					 
					
						
						
							
							* Inc version  
						
						
						
					 
					
						2015-10-13 13:46:17 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20fd36a0f7 
							
						 
					 
					
						
						
							
							* Very scrappy, likely buggy first-cut pickle implementation, to work on Issue  #125 : allow pickle for Apache Spark. The current implementation sends stuff to temp files, and does almost nothing to ensure all modifiable state is actually preserved. The Language() instance is a deep tree of extension objects, and if pickling during training, some of the C-data state is hard to preserve.  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f8de403483 
							
						 
					 
					
						
						
							
							* Work on pickling Vocab instances. The current implementation is not correct, but it may serve to see whether this approach is workable. Pickling is necessary to address Issue  #125  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85e7944572 
							
						 
					 
					
						
						
							
							* Start trying to pickle Vocab  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5ca57bd859 
							
						 
					 
					
						
						
							
							* Ensure Morphology can be pickled, to address Issue  #125 .  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0cee928467 
							
						 
					 
					
						
						
							
							* Allow StringStore to be pickled, to start addressing Issue  #125  
						
						
						
					 
					
						2015-10-13 13:44:41 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							41012907a8 
							
						 
					 
					
						
						
							
							* Fix variable name  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e70368d157 
							
						 
					 
					
						
						
							
							* Use lower case strings for dependency label names in symbols enum  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7b4af3d1e7 
							
						 
					 
					
						
						
							
							* Fix parts_of_speech now that symbols list has been reformed  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							37b909b6b6 
							
						 
					 
					
						
						
							
							* Use the symbols file in vocab instead of the symbols subfiles like attrs.pxd  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ce65ec698c 
							
						 
					 
					
						
						
							
							* Remove qualified naming in symbols  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9f4be0adcd 
							
						 
					 
					
						
						
							
							* Map NO_TAG to NIL in parts_of_speech.pxd  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							278e12f7e8 
							
						 
					 
					
						
						
							
							* Addmorphology symbols to morphology. May need to remove these as an enum.  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d80067eda1 
							
						 
					 
					
						
						
							
							* Map empty string to NULL_ATTR in attrs  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d70e8cac2c 
							
						 
					 
					
						
						
							
							* Fix empty values in attributes and parts of speech, so symbols align correctly with the StringStore  
						
						
						
					 
					
						2015-10-13 13:44:40 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a29c8ee23d 
							
						 
					 
					
						
						
							
							* Add symbols to the vocab before reading the strings, so that they line up correctly  
						
						
						
					 
					
						2015-10-13 13:44:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74c0853471 
							
						 
					 
					
						
						
							
							* Rename ATTR_IDS to attrs.IDS. Rename ATTR_NAMES to attrs.NAMES. Rename UNIV_POS_IDS to parts_of_speech.IDS  
						
						
						
					 
					
						2015-10-13 13:44:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10a4a843ea 
							
						 
					 
					
						
						
							
							* Enumerate all symbols in one file  
						
						
						
					 
					
						2015-10-13 13:44:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85ce36ab11 
							
						 
					 
					
						
						
							
							* Refactor symbols, so that frequency rank can be derived from the orth id of a word.  
						
						
						
					 
					
						2015-10-13 13:44:39 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dfbcff2ff1 
							
						 
					 
					
						
						
							
							* Revert codecs/io change to strings.pyx, as it seemed to cause an error? Will investigate.  
						
						
						
					 
					
						2015-10-10 15:54:55 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9dd2f25c74 
							
						 
					 
					
						
						
							
							* Fix Issue  #131 : Force whitespace characters to attach syntactically to previous token, and ensure they cannot serve as stand-alone 'sentence' units.  
						
						
						
					 
					
						2015-10-10 15:53:30 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8b39feefbe 
							
						 
					 
					
						
						
							
							* Add dependency post-process rule to ensure spaces are attached to neighbouring tokens, so that they can't be sentence boundaries  
						
						
						
					 
					
						2015-10-10 15:32:13 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2153067958 
							
						 
					 
					
						
						
							
							* Fix use of io in strings.pyx  
						
						
						
					 
					
						2015-10-10 15:03:12 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ec874247b5 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-10-10 14:23:51 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							30de4135c9 
							
						 
					 
					
						
						
							
							* Fix merge problem  
						
						
						
					 
					
						2015-10-10 14:22:32 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dc393a5f1d 
							
						 
					 
					
						
						
							
							Merge pull request  #126  from tomtung/master  
						
						... 
						
						
						
						Improve slicing support for both Doc and Span 
						
					 
					
						2015-10-10 14:14:57 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83dccf0fd7 
							
						 
					 
					
						
						
							
							* Use io module insteads of deprecated codecs module  
						
						
						
					 
					
						2015-10-10 14:13:01 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3dfe2b901 
							
						 
					 
					
						
						
							
							* Increment data version  
						
						
						
					 
					
						2015-10-09 13:26:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2d9e5bf566 
							
						 
					 
					
						
						
							
							* Allow punctuation to be lemmatized  
						
						
						
					 
					
						2015-10-09 19:02:42 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5332c0b697 
							
						 
					 
					
						
						
							
							* Add support for punctuation lemmatization, to handle unicode characters. This should help in addressing Issue  #130  
						
						
						
					 
					
						2015-10-09 18:54:40 +11:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							9a6811acc4 
							
						 
					 
					
						
						
							
							Merge remote-tracking branch 'upstream/master'  
						
						
						
					 
					
						2015-10-08 22:53:02 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b125289f30 
							
						 
					 
					
						
						
							
							* Fix type declaration in asciied function  
						
						
						
					 
					
						2015-10-09 13:46:57 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							801d55a6d9 
							
						 
					 
					
						
						
							
							* Fix phrase matcher  
						
						
						
					 
					
						2015-10-09 02:00:45 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b3a70e6375 
							
						 
					 
					
						
						
							
							* Clean up unnecessary try/except block  
						
						
						
					 
					
						2015-10-08 14:34:11 +11:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							0f601b8b75 
							
						 
					 
					
						
						
							
							Update docstring of Doc.__getitem__  
						
						
						
					 
					
						2015-10-07 01:27:28 -07:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							3fd3bc79aa 
							
						 
					 
					
						
						
							
							Refactor to remove duplicate slicing logic  
						
						
						
					 
					
						2015-10-07 01:25:35 -07:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							97685aecb7 
							
						 
					 
					
						
						
							
							Add slicing support to Span  
						
						
						
					 
					
						2015-10-06 02:45:49 -07:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							ef2af20cd3 
							
						 
					 
					
						
						
							
							Make Doc's slicing behavior conform to Python conventions  
						
						
						
					 
					
						2015-10-06 02:41:28 -07:00 
						 
				 
			
				
					
						
							
							
								Yubing (Tom) Dong 
							
						 
					 
					
						
						
						
						
							
						
						
							2fc33e8024 
							
						 
					 
					
						
						
							
							Allow step=1 when slicing a Doc  
						
						
						
					 
					
						2015-10-06 00:57:05 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b228a8f4a6 
							
						 
					 
					
						
						
							
							* Remove spacy/en/attrs  
						
						
						
					 
					
						2015-10-06 16:20:46 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							693677fd8d 
							
						 
					 
					
						
						
							
							* Prepare to remove en/attrx file, now that moving to symbols.pyx  
						
						
						
					 
					
						2015-10-06 16:20:13 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d9f41c2c9 
							
						 
					 
					
						
						
							
							* Add LookupError for better error reporting in Vocab  
						
						
						
					 
					
						2015-10-06 10:34:59 +11:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ecc5281b36 
							
						 
					 
					
						
						
							
							* Remove en/pos.pyx, as the tagger code now lives in spacy/tagger.pyx  
						
						
						
					 
					
						2015-10-06 10:12:08 +11:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							8caedba42a 
							
						 
					 
					
						
						
							
							caught more codecs.open -> io.open  
						
						
						
					 
					
						2015-09-30 20:20:09 +02:00 
						 
				 
			
				
					
						
							
							
								alvations 
							
						 
					 
					
						
						
						
						
							
						
						
							8199012d26 
							
						 
					 
					
						
						
							
							changing deprecated codecs.open to io.open =)  
						
						
						
					 
					
						2015-09-30 20:10:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							87e6186828 
							
						 
					 
					
						
						
							
							* Rename _seq to doc attribute in Span  
						
						
						
					 
					
						2015-09-29 23:03:55 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ab694b0364 
							
						 
					 
					
						
						
							
							* Fix open-bounded slice indices.  
						
						
						
					 
					
						2015-09-29 23:03:09 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a6ced80c0c 
							
						 
					 
					
						
						
							
							* Fix Issue  #116 : Misleading handling of True value in Language.__init__.  
						
						
						
					 
					
						2015-09-29 20:54:12 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f9d2a5b651 
							
						 
					 
					
						
						
							
							* Fix issue  #112 : Replace unidecode with text-unidecode, to avoid license problems.  
						
						
						
					 
					
						2015-09-28 23:40:18 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c33a96ac3 
							
						 
					 
					
						
						
							
							Merge pull request  #99  from rw/patch-1  
						
						... 
						
						
						
						Force SSL for downloading English language data. 
						
					 
					
						2015-09-28 17:46:26 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							abf0d930af 
							
						 
					 
					
						
						
							
							* Fix API for loading word vectors from a file.  
						
						
						
					 
					
						2015-09-23 23:51:08 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f5c256745b 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy  
						
						
						
					 
					
						2015-09-22 12:26:24 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							528e26a506 
							
						 
					 
					
						
						
							
							* Add rule to ensure ordinals are preserved as single tokens  
						
						
						
					 
					
						2015-09-22 12:26:05 +10:00 
						 
				 
			
				
					
						
							
							
								Robert 
							
						 
					 
					
						
						
						
						
							
						
						
							8711b64860 
							
						 
					 
					
						
						
							
							Force SSL for downloading English language data.  
						
						... 
						
						
						
						It would also be nice to have a checksum for this. 
						
					 
					
						2015-09-21 17:26:01 -07:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f7283a5067 
							
						 
					 
					
						
						
							
							* Fix vectors bugs for OOV words  
						
						
						
					 
					
						2015-09-22 02:10:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							44aecba701 
							
						 
					 
					
						
						
							
							* Fix Token.has_vector and Lexeme.has_vector  
						
						
						
					 
					
						2015-09-22 01:43:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							596fde8daa 
							
						 
					 
					
						
						
							
							* Add has_vector attribute to Token and Lexeme  
						
						
						
					 
					
						2015-09-21 19:52:43 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f32927efbf 
							
						 
					 
					
						
						
							
							* Raise exceptions if attempt to access parse, but data is not installed. This partly but not fully addresses Issue  #97 . Still need exceptions on the various Token attributes that access the parse tree, e.g. token.head, token.lefts, token.rights, etc. Exceptions should be centralized, too.  
						
						
						
					 
					
						2015-09-21 18:35:40 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							388062ae01 
							
						 
					 
					
						
						
							
							* Fix repvec_length problem  
						
						
						
					 
					
						2015-09-21 18:10:51 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ac459278d1 
							
						 
					 
					
						
						
							
							* Fix vector length error reporting, and ensure vec_len is returned  
						
						
						
					 
					
						2015-09-21 18:08:32 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ba4e563701 
							
						 
					 
					
						
						
							
							* Ensure vectors are same length, and return vector length in load_vectors_bz2  
						
						
						
					 
					
						2015-09-21 18:03:08 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d00fe2bbc6 
							
						 
					 
					
						
						
							
							* Don't allow Span objects to be written to, as it introduces subtle bugs because they're created afresh from Doc.sents, Doc.ents etc.  
						
						
						
					 
					
						2015-09-21 17:59:39 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d6945bf880 
							
						 
					 
					
						
						
							
							* Add way to load vectors from bz2 file to vocab  
						
						
						
					 
					
						2015-09-17 12:58:23 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							77856c4fcd 
							
						 
					 
					
						
						
							
							* Try giving Doc and Span objects vector and vector_norm attributes, and .similarity functions. Turns out to be bad idea.  
						
						
						
					 
					
						2015-09-17 11:50:11 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							191d593e03 
							
						 
					 
					
						
						
							
							* Fix vectors bug in lexeme  
						
						
						
					 
					
						2015-09-15 19:05:11 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d87519f64 
							
						 
					 
					
						
						
							
							* Remove vectors argument from Vocab object  
						
						
						
					 
					
						2015-09-15 14:47:14 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							362526b592 
							
						 
					 
					
						
						
							
							* Rename vectors_length attribute  
						
						
						
					 
					
						2015-09-15 14:43:31 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							60c26b2dfa 
							
						 
					 
					
						
						
							
							* Fix slicing when start or stop is None  
						
						
						
					 
					
						2015-09-15 14:43:10 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7ac6cacc26 
							
						 
					 
					
						
						
							
							* Remove const qualifier on LexemeC.repvec  
						
						
						
					 
					
						2015-09-15 14:42:51 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dd4d64b235 
							
						 
					 
					
						
						
							
							* Support setting of word vectors on Lexeme object.  
						
						
						
					 
					
						2015-09-15 14:42:27 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							27f988b167 
							
						 
					 
					
						
						
							
							* Remove the vectors option to Vocab, preferring to either load vectors from disk, or set them on the Lexeme objects.  
						
						
						
					 
					
						2015-09-15 14:41:48 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							193f127f81 
							
						 
					 
					
						
						
							
							* Fix ugly py_check_flag and py_set_flag functions in Lexeme  
						
						
						
					 
					
						2015-09-15 13:06:18 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9561d88529 
							
						 
					 
					
						
						
							
							* Add is_stop to Python API  
						
						
						
					 
					
						2015-09-14 18:25:40 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							65dc0d1dfb 
							
						 
					 
					
						
						
							
							* Extend word vectors support, with .similarity() function, vector_norm property, and rename repvec to vector. Keep repvec name as well for now for backwards compatibility.  
						
						
						
					 
					
						2015-09-14 17:49:58 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e13e47e9e5 
							
						 
					 
					
						
						
							
							* Add English stop words  
						
						
						
					 
					
						2015-09-14 17:48:51 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							24ed3fc25c 
							
						 
					 
					
						
						
							
							* Check file existance before opening in lemmatizer  
						
						
						
					 
					
						2015-09-13 10:45:21 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dbb48ce49e 
							
						 
					 
					
						
						
							
							* Delete extra wordnets  
						
						
						
					 
					
						2015-09-13 10:31:37 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e9c59693ea 
							
						 
					 
					
						
						
							
							* Remove assertion from vocab.pyx  
						
						
						
					 
					
						2015-09-13 10:30:08 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c08f10083c 
							
						 
					 
					
						
						
							
							* Add test and test_with_ws attributes.  
						
						
						
					 
					
						2015-09-13 10:27:42 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0b7d2a6c62 
							
						 
					 
					
						
						
							
							* Inc version  
						
						
						
					 
					
						2015-09-13 01:26:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e1dfaeed8a 
							
						 
					 
					
						
						
							
							* Check serializer freqs exist before loading  
						
						
						
					 
					
						2015-09-12 23:49:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a412c66c8c 
							
						 
					 
					
						
						
							
							* Check serializer freqs exist before loading  
						
						
						
					 
					
						2015-09-12 23:40:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							631c843ed1 
							
						 
					 
					
						
						
							
							* Don't look for index.adv in le,matizer  
						
						
						
					 
					
						2015-09-12 06:03:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dfdd4f2d60 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/honnibal/spaCy  into develop  
						
						
						
					 
					
						2015-09-10 15:23:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e285ca7d6c 
							
						 
					 
					
						
						
							
							* Load serializer freqs in vocab  
						
						
						
					 
					
						2015-09-10 15:22:48 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f7fdcce1f9 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/honnibal/spaCy  into develop  
						
						
						
					 
					
						2015-09-10 14:52:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							85c3fec1d1 
							
						 
					 
					
						
						
							
							* Fix morphology loading  
						
						
						
					 
					
						2015-09-10 14:52:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7c660c5efc 
							
						 
					 
					
						
						
							
							* Use dict.get in lemmatizer  
						
						
						
					 
					
						2015-09-10 14:51:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							094440f9f5 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of ssh://github.com/honnibal/spaCy into develop  
						
						
						
					 
					
						2015-09-10 14:51:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c3f773cd63 
							
						 
					 
					
						
						
							
							* Fix Lexeme.check_flag  
						
						
						
					 
					
						2015-09-10 14:51:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							90da3a695d 
							
						 
					 
					
						
						
							
							* Load lemmatizer from disk in Vocab.from_dir  
						
						
						
					 
					
						2015-09-10 14:49:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e7e529edf4 
							
						 
					 
					
						
						
							
							* Fix Lexeme.check_flag  
						
						
						
					 
					
						2015-09-10 14:45:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9e7bfe8449 
							
						 
					 
					
						
						
							
							* Fix space at end of merged token  
						
						
						
					 
					
						2015-09-10 14:45:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f634191e27 
							
						 
					 
					
						
						
							
							* Fix vocab read/write  
						
						
						
					 
					
						2015-09-10 14:44:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							31ccf494e6 
							
						 
					 
					
						
						
							
							Merge branch 'develop' of  https://github.com/honnibal/spaCy  into develop  
						
						
						
					 
					
						2015-09-09 14:33:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a7f4b26c8c 
							
						 
					 
					
						
						
							
							* Tmp  
						
						
						
					 
					
						2015-09-09 14:33:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							07686470a9 
							
						 
					 
					
						
						
							
							* Don't consider a coordinated NP a base chunk  
						
						
						
					 
					
						2015-09-09 14:32:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d9f1fc2112 
							
						 
					 
					
						
						
							
							* Add deprecation warning for unused load_vectors argument.  
						
						
						
					 
					
						2015-09-09 14:31:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0b527fbdc8 
							
						 
					 
					
						
						
							
							* Set POS tag in morphology  
						
						
						
					 
					
						2015-09-09 14:30:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							07c09a0e1b 
							
						 
					 
					
						
						
							
							* Fix attribute getters and setters in Lexeme  
						
						
						
					 
					
						2015-09-09 14:29:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d6561988cf 
							
						 
					 
					
						
						
							
							* Fix lexemes.bin  
						
						
						
					 
					
						2015-09-09 11:49:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c301bebd33 
							
						 
					 
					
						
						
							
							Merge branch 'master' of  https://github.com/honnibal/spaCy  into develop  
						
						
						
					 
					
						2015-09-09 10:55:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0e24d099a1 
							
						 
					 
					
						
						
							
							* Fix L/R edge bug, by ensuring l_edge and r_edge are preset, and fixing the way the edge update in del_arc. Bugs keep arising here because the edges are absolute positions, where everything else is relative. I'm also not 100% convinced that del_arc is handled correctly. Do we need to update the parents?  
						
						
						
					 
					
						2015-09-09 03:40:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2be3620333 
							
						 
					 
					
						
						
							
							* Save morphological analyses in a cache  
						
						
						
					 
					
						2015-09-08 15:39:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1def5a6cbe 
							
						 
					 
					
						
						
							
							* Fix print statements in matcher  
						
						
						
					 
					
						2015-09-08 15:38:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							64d71f8893 
							
						 
					 
					
						
						
							
							* Fix lemmatizer  
						
						
						
					 
					
						2015-09-08 15:38:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							623329b19a 
							
						 
					 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy into develop  
						
						
						
					 
					
						2015-09-08 14:27:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							62a01dd41d 
							
						 
					 
					
						
						
							
							* Fix issue  #92 : lexemes.bin read error on 32-bit platforms.  
						
						
						
					 
					
						2015-09-08 14:23:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ef58607a99 
							
						 
					 
					
						
						
							
							* Add spacy.it  
						
						
						
					 
					
						2015-09-06 22:10:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2154a54f6b 
							
						 
					 
					
						
						
							
							* Add spacy.de  
						
						
						
					 
					
						2015-09-06 21:56:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f6ec5bf1b0 
							
						 
					 
					
						
						
							
							* Use empty tag map in vocab if none supplied  
						
						
						
					 
					
						2015-09-06 20:19:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4f8e38271d 
							
						 
					 
					
						
						
							
							* Fix merge errors in lexeme.pxd  
						
						
						
					 
					
						2015-09-06 20:19:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							86c888667f 
							
						 
					 
					
						
						
							
							* Merge in changes from de branch  
						
						
						
					 
					
						2015-09-06 19:49:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d2fc104a26 
							
						 
					 
					
						
						
							
							* Begin merge of Gazetteer and DE branches  
						
						
						
					 
					
						2015-09-06 19:45:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dbf8dce109 
							
						 
					 
					
						
						
							
							Merge branch 'gaz' of ssh://github.com/honnibal/spaCy into gaz  
						
						
						
					 
					
						2015-09-06 18:44:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9eae9837c4 
							
						 
					 
					
						
						
							
							* Fix morphology look up  
						
						
						
					 
					
						2015-09-06 17:53:39 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6427a3fcac 
							
						 
					 
					
						
						
							
							* Temporarily import flag attributes in matcher  
						
						
						
					 
					
						2015-09-06 17:53:12 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7cc56ada6e 
							
						 
					 
					
						
						
							
							* Temporarily add py_set_flag attribute in Lexeme  
						
						
						
					 
					
						2015-09-06 17:52:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e35bb36be7 
							
						 
					 
					
						
						
							
							* Ensure Lexeme.check_flag returns a boolean value  
						
						
						
					 
					
						2015-09-06 17:52:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7e4fea67d3 
							
						 
					 
					
						
						
							
							* Fix bug in token subtree, introduced by duplication of L/R code in Stateclass. Need to consolidate the two methods.  
						
						
						
					 
					
						2015-09-06 10:48:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5edac11225 
							
						 
					 
					
						
						
							
							* Wrap self.parse in nogil, and break if an invalid move is predicted. The invalid break is a work-around that papers over likely bugs, but we can't easily break in the nogil block, and otherwise we'll get an infinite loop. Need to set this as an error flag.  
						
						
						
					 
					
						2015-09-06 04:15:00 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd1eeb3102 
							
						 
					 
					
						
						
							
							* Add POS attribute support in get_attr  
						
						
						
					 
					
						2015-09-06 04:13:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							534e3dda3c 
							
						 
					 
					
						
						
							
							* More work on language independent parsing  
						
						
						
					 
					
						2015-08-28 03:44:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2307fa9ee 
							
						 
					 
					
						
						
							
							* More work on language-generic parsing  
						
						
						
					 
					
						2015-08-28 02:02:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							86c4a8e3e2 
							
						 
					 
					
						
						
							
							* Work on new morphology organization  
						
						
						
					 
					
						2015-08-27 23:11:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5b89e2454c 
							
						 
					 
					
						
						
							
							* Improve error-reporting in tagger  
						
						
						
					 
					
						2015-08-27 10:26:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f0a7c99554 
							
						 
					 
					
						
						
							
							* Relax rule-requirement in lemmatizer  
						
						
						
					 
					
						2015-08-27 10:26:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0af139e183 
							
						 
					 
					
						
						
							
							* Tagger training now working. Still need to test load/save of model. Morphology still broken.  
						
						
						
					 
					
						2015-08-27 09:16:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1302d35dff 
							
						 
					 
					
						
						
							
							* Rework interfaces in vocab  
						
						
						
					 
					
						2015-08-26 19:21:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2d521768a3 
							
						 
					 
					
						
						
							
							* Store Morphology class in Vocab  
						
						
						
					 
					
						2015-08-26 19:21:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d30029979e 
							
						 
					 
					
						
						
							
							* Avoid import of morphology in spans  
						
						
						
					 
					
						2015-08-26 19:20:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							119c0f8c3f 
							
						 
					 
					
						
						
							
							* Hack out morphology stuff from tokenizer, while morphology being reimplemented.  
						
						
						
					 
					
						2015-08-26 19:20:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b4faf551f5 
							
						 
					 
					
						
						
							
							* Refactor language-independent tagger class  
						
						
						
					 
					
						2015-08-26 19:19:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a3d5e6c0dd 
							
						 
					 
					
						
						
							
							* Reform constructor and save/load workflow in parser model  
						
						
						
					 
					
						2015-08-26 19:19:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1d7f2d3abc 
							
						 
					 
					
						
						
							
							* Hack on morphology structs  
						
						
						
					 
					
						2015-08-26 19:18:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f8f2f4e545 
							
						 
					 
					
						
						
							
							* Temporarily add PUNC name to parts_of_specch dictionary, until better solution  
						
						
						
					 
					
						2015-08-26 19:18:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							008b02b035 
							
						 
					 
					
						
						
							
							* Comment out enums in Morpohlogy for now  
						
						
						
					 
					
						2015-08-26 19:17:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							378729f81a 
							
						 
					 
					
						
						
							
							* Hack Morphology class towards usability  
						
						
						
					 
					
						2015-08-26 19:17:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							430affc347 
							
						 
					 
					
						
						
							
							* Fix missing n_patterns property in Matcher class. Fix from_dir method  
						
						
						
					 
					
						2015-08-26 19:17:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3acf60df06 
							
						 
					 
					
						
						
							
							* Add missing properties in Lexeme class  
						
						
						
					 
					
						2015-08-26 19:16:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76996f4145 
							
						 
					 
					
						
						
							
							* Hack on generic Language class. Still needs work for morphology, defaults, etc  
						
						
						
					 
					
						2015-08-26 19:16:09 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e2ef78b29c 
							
						 
					 
					
						
						
							
							* Gut pos.pyx module, since functionality moved to spacy/tagger.pyx  
						
						
						
					 
					
						2015-08-26 19:15:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4d8754385 
							
						 
					 
					
						
						
							
							* Specify LOCAL_DATA_DIR global in spacy.en.__init__.py  
						
						
						
					 
					
						2015-08-26 19:15:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c2d8edd0bd 
							
						 
					 
					
						
						
							
							* Add PROB attribute in attrs.pxd  
						
						
						
					 
					
						2015-08-26 19:14:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c5a27d1821 
							
						 
					 
					
						
						
							
							* Move lemmatizer to spacy  
						
						
						
					 
					
						2015-08-25 15:47:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							82217c6ec6 
							
						 
					 
					
						
						
							
							* Generalize lemmatizer  
						
						
						
					 
					
						2015-08-25 15:46:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8083a07c3e 
							
						 
					 
					
						
						
							
							* Use language base class  
						
						
						
					 
					
						2015-08-25 15:37:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f2f699ac18 
							
						 
					 
					
						
						
							
							* Add language base class  
						
						
						
					 
					
						2015-08-25 15:37:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5dd76be446 
							
						 
					 
					
						
						
							
							* Split EnPosTagger up into base class and subclass  
						
						
						
					 
					
						2015-08-24 05:25:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d5922dbfa 
							
						 
					 
					
						
						
							
							* Begin laying out morphological features  
						
						
						
					 
					
						2015-08-24 01:04:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6f1743692a 
							
						 
					 
					
						
						
							
							* Work on language-independent refactoring  
						
						
						
					 
					
						2015-08-23 20:49:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3879d28457 
							
						 
					 
					
						
						
							
							* Fix https for url detection  
						
						
						
					 
					
						2015-08-23 02:40:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cad0cca4e3 
							
						 
					 
					
						
						
							
							* Tmp  
						
						
						
					 
					
						2015-08-22 22:04:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bf38b3b883 
							
						 
					 
					
						
						
							
							* Hack on l/r reversal bug  
						
						
						
					 
					
						2015-08-10 05:58:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6116413b47 
							
						 
					 
					
						
						
							
							* Fix label prediction in StepwiseState  
						
						
						
					 
					
						2015-08-10 05:05:31 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c9753eff2 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-10 00:09:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9de98f5a6f 
							
						 
					 
					
						
						
							
							* Add Parser.stepthrough method, with context manager  
						
						
						
					 
					
						2015-08-10 00:08:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fe43f8cf39 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-09 02:31:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c090945e0 
							
						 
					 
					
						
						
							
							* Add Parser.predict method, and clean up Parser.get_state  
						
						
						
					 
					
						2015-08-09 02:29:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							04fccfb984 
							
						 
					 
					
						
						
							
							* Fix get_state for parser prediction  
						
						
						
					 
					
						2015-08-09 02:11:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							55fde0e240 
							
						 
					 
					
						
						
							
							* Fix get_state  
						
						
						
					 
					
						2015-08-09 01:45:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f0f4fa9838 
							
						 
					 
					
						
						
							
							* Fix Parser.get_state  
						
						
						
					 
					
						2015-08-09 01:40:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							18331dca89 
							
						 
					 
					
						
						
							
							* Add continue_for argument to parser 'partial' function, which is now renamed to get_state  
						
						
						
					 
					
						2015-08-09 01:31:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0653288fa5 
							
						 
					 
					
						
						
							
							* Fix stateclass.queue  
						
						
						
					 
					
						2015-08-09 00:39:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9de218b7ba 
							
						 
					 
					
						
						
							
							* Fix Parser.partial function  
						
						
						
					 
					
						2015-08-08 23:45:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							01be34d55a 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-08 23:37:44 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cc9deae960 
							
						 
					 
					
						
						
							
							* Add is_valid method to transition_system  
						
						
						
					 
					
						2015-08-08 23:36:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2a46c77324 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-08 23:35:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7bafc789e7 
							
						 
					 
					
						
						
							
							* Add stack and queue properties to stateclass, for python access  
						
						
						
					 
					
						2015-08-08 23:32:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3af938365f 
							
						 
					 
					
						
						
							
							* Add function partial to Parser  
						
						
						
					 
					
						2015-08-08 23:32:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							76a1f0481a 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-08-08 23:31:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b0f5c39084 
							
						 
					 
					
						
						
							
							* Fix handling of exclusion entities  
						
						
						
					 
					
						2015-08-06 17:28:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9f65879991 
							
						 
					 
					
						
						
							
							* Fix shape attr bug, and fix handling of false positive matches  
						
						
						
					 
					
						2015-08-06 17:28:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							10d869d102 
							
						 
					 
					
						
						
							
							* Don't allow conjunction between NPs in base NP chunks  
						
						
						
					 
					
						2015-08-06 16:31:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							383dfabd67 
							
						 
					 
					
						
						
							
							* Fix matcher setting of entities  
						
						
						
					 
					
						2015-08-06 16:27:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							59c3bf60a6 
							
						 
					 
					
						
						
							
							* Ensure entity recognizer doesn't over-write preset types  
						
						
						
					 
					
						2015-08-06 16:09:08 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cd7d1682cd 
							
						 
					 
					
						
						
							
							* Fix loading of gazetteer.json file  
						
						
						
					 
					
						2015-08-06 16:08:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c667b7f15 
							
						 
					 
					
						
						
							
							* Set a value in attrs.pxd on the first flag, to reduce bugs  
						
						
						
					 
					
						2015-08-06 16:08:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c263577424 
							
						 
					 
					
						
						
							
							* Fix lower attribute in lexeme.pxd  
						
						
						
					 
					
						2015-08-06 16:07:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5737115e1e 
							
						 
					 
					
						
						
							
							* Work on gazetteer matching  
						
						
						
					 
					
						2015-08-06 14:33:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c1724ecae 
							
						 
					 
					
						
						
							
							* Gazetteer stuff working, now need to wire up to API  
						
						
						
					 
					
						2015-08-06 00:35:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5bc0e83f9a 
							
						 
					 
					
						
						
							
							* Reimplement matching in Cython, instead of Python.  
						
						
						
					 
					
						2015-08-05 01:05:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4c87a696b3 
							
						 
					 
					
						
						
							
							* Add draft dfa matcher, in Python. Passing tests.  
						
						
						
					 
					
						2015-08-04 15:55:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eb7138c761 
							
						 
					 
					
						
						
							
							* Add attr relation in base NP detection  
						
						
						
					 
					
						2015-08-01 00:34:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4988356cf0 
							
						 
					 
					
						
						
							
							* Fix dependency type bug from merged tokens  
						
						
						
					 
					
						2015-08-01 00:33:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							78a9068319 
							
						 
					 
					
						
						
							
							* Fix spacy attr on merged tokens  
						
						
						
					 
					
						2015-07-30 04:25:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							430e2edb96 
							
						 
					 
					
						
						
							
							* Fix noun_chunks issue  
						
						
						
					 
					
						2015-07-30 03:51:50 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9590968fc1 
							
						 
					 
					
						
						
							
							* Fix negative indices in Span  
						
						
						
					 
					
						2015-07-30 02:30:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							74d8cb3980 
							
						 
					 
					
						
						
							
							* Add noun_chunks iterator, and fix left/right child setting in Doc.merge  
						
						
						
					 
					
						2015-07-30 02:29:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d153f18969 
							
						 
					 
					
						
						
							
							* Fix negative indices on spans  
						
						
						
					 
					
						2015-07-29 22:36:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b5132bed7d 
							
						 
					 
					
						
						
							
							* Set left and right children when loading parse from byte string  
						
						
						
					 
					
						2015-07-28 21:03:18 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6609fcf4b2 
							
						 
					 
					
						
						
							
							* Make mem and vocab python-visible in Doc  
						
						
						
					 
					
						2015-07-28 20:46:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d42fe2e694 
							
						 
					 
					
						
						
							
							* Add unicode_literals to strings.pyx  
						
						
						
					 
					
						2015-07-28 16:15:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb910cff92 
							
						 
					 
					
						
						
							
							* Fix Python3 problem in align_raw  
						
						
						
					 
					
						2015-07-28 16:06:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dcafb181b9 
							
						 
					 
					
						
						
							
							* Fix Python3 problem in align_raw  
						
						
						
					 
					
						2015-07-28 15:52:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c609ea18f0 
							
						 
					 
					
						
						
							
							* Increment version in download script  
						
						
						
					 
					
						2015-07-28 15:22:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9c4d0aae62 
							
						 
					 
					
						
						
							
							* Switch to better Python2/3 compatible unicode handling  
						
						
						
					 
					
						2015-07-28 14:45:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7606d9936f 
							
						 
					 
					
						
						
							
							* Python3 correction for GoldParse  
						
						
						
					 
					
						2015-07-28 14:44:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ddc1a5cfe5 
							
						 
					 
					
						
						
							
							* Fix training under python3  
						
						
						
					 
					
						2015-07-28 14:09:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a8bbd7312c 
							
						 
					 
					
						
						
							
							* Hackishly patch long dependencies problem  
						
						
						
					 
					
						2015-07-28 00:14:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb583f7f09 
							
						 
					 
					
						
						
							
							* Hackishly patch long dependencies problem  
						
						
						
					 
					
						2015-07-27 23:14:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aa7a964a4f 
							
						 
					 
					
						
						
							
							* Add a type declaration for doc.from_array  
						
						
						
					 
					
						2015-07-27 22:57:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							25a8774f42 
							
						 
					 
					
						
						
							
							* Fix regression in packer  
						
						
						
					 
					
						2015-07-27 21:53:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1601e488ee 
							
						 
					 
					
						
						
							
							* Fix bug in decoding non-ascii characters  
						
						
						
					 
					
						2015-07-27 21:43:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a95409cd2 
							
						 
					 
					
						
						
							
							* Fix type on bits  
						
						
						
					 
					
						2015-07-27 21:16:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a296d72b54 
							
						 
					 
					
						
						
							
							* Fix en/attrs  
						
						
						
					 
					
						2015-07-27 21:16:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							45460f505c 
							
						 
					 
					
						
						
							
							* Fix data type on read32 in BitArray  
						
						
						
					 
					
						2015-07-27 21:12:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3d43f49f69 
							
						 
					 
					
						
						
							
							* Revert prev change  
						
						
						
					 
					
						2015-07-27 10:58:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6b586cdad4 
							
						 
					 
					
						
						
							
							* Change lexemes.bin format. Add a header specifying size of LexemeC and number of lexemes, and don't have the redundant orth information.  
						
						
						
					 
					
						2015-07-27 08:31:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af6ed18f2a 
							
						 
					 
					
						
						
							
							* Ensure we don't use orth_encode on OOV words.  
						
						
						
					 
					
						2015-07-27 02:12:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8535d872e8 
							
						 
					 
					
						
						
							
							* Set is_oov property in get_flags  
						
						
						
					 
					
						2015-07-27 01:51:24 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8e4c69ee8c 
							
						 
					 
					
						
						
							
							* Add is_oov property, and fix up handling of attributes  
						
						
						
					 
					
						2015-07-27 01:50:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc268f03eb 
							
						 
					 
					
						
						
							
							* Assert against null pointer exceptions in vocab  
						
						
						
					 
					
						2015-07-27 01:00:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0f093fdb30 
							
						 
					 
					
						
						
							
							* Fix get_by_orth for py3  
						
						
						
					 
					
						2015-07-26 19:26:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ceeda5a739 
							
						 
					 
					
						
						
							
							* Fix get_by_orth for py3  
						
						
						
					 
					
						2015-07-26 18:39:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6bb96c122d 
							
						 
					 
					
						
						
							
							* Host IS_ flags in attrs.pxd, and add properties for them on Token and Lexeme objects  
						
						
						
					 
					
						2015-07-26 16:37:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							eeaea25f0c 
							
						 
					 
					
						
						
							
							* Check oov_prob file is present  
						
						
						
					 
					
						2015-07-26 16:36:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7eb2446082 
							
						 
					 
					
						
						
							
							* Return empty lexeme on empty string  
						
						
						
					 
					
						2015-07-26 00:18:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1b5d1da2a7 
							
						 
					 
					
						
						
							
							* Allow an OOV probability to be specified in get_lex_props  
						
						
						
					 
					
						2015-07-26 00:03:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							cd6e25132b 
							
						 
					 
					
						
						
							
							* Allow an OOV probability to be specified in get_lex_props  
						
						
						
					 
					
						2015-07-26 00:01:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fd525f0675 
							
						 
					 
					
						
						
							
							* Pass OOV probability around  
						
						
						
					 
					
						2015-07-25 23:29:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3fe14b8ed6 
							
						 
					 
					
						
						
							
							* Fix CFile for Python2  
						
						
						
					 
					
						2015-07-25 22:55:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							823ef4a00b 
							
						 
					 
					
						
						
							
							* Remove profile declarations  
						
						
						
					 
					
						2015-07-25 18:13:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f4809e562f 
							
						 
					 
					
						
						
							
							* Allow json to be used as a fallback if ujson is not available  
						
						
						
					 
					
						2015-07-25 18:11:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9da06671cf 
							
						 
					 
					
						
						
							
							* Remove unused import  
						
						
						
					 
					
						2015-07-25 18:11:16 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2060935cdb 
							
						 
					 
					
						
						
							
							* Remove explicit bytes type in doc.from_bytes, to accept bytearray  
						
						
						
					 
					
						2015-07-24 04:54:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							aa28e2e01d 
							
						 
					 
					
						
						
							
							* Release the GIL around parse function  
						
						
						
					 
					
						2015-07-24 04:53:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d62eb34b76 
							
						 
					 
					
						
						
							
							* More Py 2/3 compatibility in bit strings  
						
						
						
					 
					
						2015-07-24 04:52:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0bb839d299 
							
						 
					 
					
						
						
							
							* Fix string coercion for Python 3  
						
						
						
					 
					
						2015-07-24 03:49:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4ff410fdb 
							
						 
					 
					
						
						
							
							* Fix bytes problems for Python3  
						
						
						
					 
					
						2015-07-24 03:48:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1ab25e4dad 
							
						 
					 
					
						
						
							
							* Fix python3 type error  
						
						
						
					 
					
						2015-07-24 02:45:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f35ff173b0 
							
						 
					 
					
						
						
							
							* Fix bits.pyx unicode error  
						
						
						
					 
					
						2015-07-23 20:37:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1406e24327 
							
						 
					 
					
						
						
							
							* Fix unicode error for Python3  
						
						
						
					 
					
						2015-07-23 19:36:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dbda6c27fa 
							
						 
					 
					
						
						
							
							* Fix python3 error  
						
						
						
					 
					
						2015-07-23 14:52:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							99387f9572 
							
						 
					 
					
						
						
							
							* Fix python3 error  
						
						
						
					 
					
						2015-07-23 14:30:29 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b81ffe9032 
							
						 
					 
					
						
						
							
							* Fix typing on mode string in CFile  
						
						
						
					 
					
						2015-07-23 13:24:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							22028602a9 
							
						 
					 
					
						
						
							
							* Add unicode_literals declaration in vocab.pyx  
						
						
						
					 
					
						2015-07-23 13:24:20 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5b41744270 
							
						 
					 
					
						
						
							
							* Check for directory presence before loading annotators  
						
						
						
					 
					
						2015-07-23 09:27:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							df01a88763 
							
						 
					 
					
						
						
							
							Merge branch 'refactor' (and serializaton)  
						
						... 
						
						
						
						Add Huffman-code serialization, and do a lot of
refactoring. Highlights include:
* Much more efficient StringStore
* Vocab maintains a by-orth mapping of Lexemes
* Avoid manually slicing Py_UNICODE buffers,
  simplifying tokenizer and vocab C APIs
* Remove various bits of dead code
* Work on removing GIL around parser
* Work on bridge to Theano
Conflicts:
	spacy/strings.pxd
	spacy/strings.pyx
	spacy/structs.pxd 
						
					 
					
						2015-07-23 02:18:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a7c4d72e83 
							
						 
					 
					
						
						
							
							* Add serializer property to Vocab, and lazy-load it. Add get_by_orth method.  
						
						
						
					 
					
						2015-07-23 01:18:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6ab1696b15 
							
						 
					 
					
						
						
							
							* Remove read_encoding_freqs from util.py  
						
						
						
					 
					
						2015-07-23 01:17:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d5255aad77 
							
						 
					 
					
						
						
							
							* Update freqs for missing tags in ner, for serializer  
						
						
						
					 
					
						2015-07-23 01:17:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							12699a1152 
							
						 
					 
					
						
						
							
							* Set initial freqs, to avoid missing values in serializer  
						
						
						
					 
					
						2015-07-23 01:16:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							680bb47b55 
							
						 
					 
					
						
						
							
							* Write serializer freqs to single file, vocab/serializer.json  
						
						
						
					 
					
						2015-07-23 01:15:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a0e36e8efc 
							
						 
					 
					
						
						
							
							* Add working to/from bytes API to Doc  
						
						
						
					 
					
						2015-07-23 01:14:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f31d96bf9 
							
						 
					 
					
						
						
							
							* Fix Packer API, so that it reads and writes bytes strings, instead of BitArray. Docs are always byte aligned anyway.  
						
						
						
					 
					
						2015-07-23 01:13:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							38ef986b29 
							
						 
					 
					
						
						
							
							* Update spacy/en/attrs.pxd  
						
						
						
					 
					
						2015-07-23 01:10:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							06eac32610 
							
						 
					 
					
						
						
							
							* Add cfile.pyx  
						
						
						
					 
					
						2015-07-23 01:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0c507bd80a 
							
						 
					 
					
						
						
							
							* Fix tokenizer  
						
						
						
					 
					
						2015-07-22 14:10:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c86dbe4944 
							
						 
					 
					
						
						
							
							* Update English.save_models for new Packer save/load stuff  
						
						
						
					 
					
						2015-07-22 13:40:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bf77bcd6b9 
							
						 
					 
					
						
						
							
							* Add comment explaining hash_string  
						
						
						
					 
					
						2015-07-22 13:39:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							815bda201d 
							
						 
					 
					
						
						
							
							* Remove UniStr struct  
						
						
						
					 
					
						2015-07-22 13:39:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2fc66e3723 
							
						 
					 
					
						
						
							
							* Use Py_UNICODE in tokenizer for now, while sort out Py_UCS4 stuff  
						
						
						
					 
					
						2015-07-22 13:38:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4d61239eac 
							
						 
					 
					
						
						
							
							* Reorganize the serialization functions on Doc  
						
						
						
					 
					
						2015-07-22 04:53:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							109106a949 
							
						 
					 
					
						
						
							
							* Replace UniStr, using unicode objects instead  
						
						
						
					 
					
						2015-07-22 04:52:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							424854028f 
							
						 
					 
					
						
						
							
							* Fix decode_int32  
						
						
						
					 
					
						2015-07-21 20:09:59 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							304d0e2633 
							
						 
					 
					
						
						
							
							* Use decode_int32 in _orth_decode  
						
						
						
					 
					
						2015-07-21 20:40:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9cfa59ec33 
							
						 
					 
					
						
						
							
							* Optimistically try orth encoding, with char as a back-off  
						
						
						
					 
					
						2015-07-21 20:22:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c8b89e37a5 
							
						 
					 
					
						
						
							
							* Bug fix to faster huffman decoding  
						
						
						
					 
					
						2015-07-21 20:05:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b166d1d2a2 
							
						 
					 
					
						
						
							
							* Use encode32 and decode32  
						
						
						
					 
					
						2015-07-21 19:59:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c6cd0ddce8 
							
						 
					 
					
						
						
							
							* Add faster encode_int32 and decode_int32 methods  
						
						
						
					 
					
						2015-07-21 19:58:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dd60594f41 
							
						 
					 
					
						
						
							
							* Fix double encoding error in strings.pyx  
						
						
						
					 
					
						2015-07-20 13:52:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							06639dc497 
							
						 
					 
					
						
						
							
							* Add length cap to word shape feature  
						
						
						
					 
					
						2015-07-20 12:06:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							128b6d9714 
							
						 
					 
					
						
						
							
							* Move Utf8Str struct to strings module, as that's the only place it's relevant  
						
						
						
					 
					
						2015-07-20 12:06:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							01a97b90f3 
							
						 
					 
					
						
						
							
							* Fix header for string store  
						
						
						
					 
					
						2015-07-20 12:06:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							52d538ea42 
							
						 
					 
					
						
						
							
							* Fix short string optimization in strings.pyx. StringStore tests now all pass.  
						
						
						
					 
					
						2015-07-20 12:05:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							09a3055630 
							
						 
					 
					
						
						
							
							* Work on short string optimization in Utf8Str  
						
						
						
					 
					
						2015-07-20 11:26:46 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bb0ba1f0cd 
							
						 
					 
					
						
						
							
							* Improve serialization speed  
						
						
						
					 
					
						2015-07-20 03:27:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8743a8c084 
							
						 
					 
					
						
						
							
							* Update Doc serialization for new Packer interface  
						
						
						
					 
					
						2015-07-20 01:38:04 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f7170e0e1 
							
						 
					 
					
						
						
							
							* Reinstate the fixed vocabulary --- words are only added to the lexicon in init_model, after that we create LexemeC structs with the Pool given to us.  
						
						
						
					 
					
						2015-07-20 01:37:34 +02:00