Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d4407d8e2f 
							
						 
					 
					
						
						
							
							Py3 compatibility tweak  
						
						
						
					 
					
						2015-07-23 09:45:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8d7e044e2c 
							
						 
					 
					
						
						
							
							* Add clusters file  
						
						
						
					 
					
						2015-07-23 09:35:56 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5b41744270 
							
						 
					 
					
						
						
							
							* Check for directory presence before loading annotators  
						
						
						
					 
					
						2015-07-23 09:27:37 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							da4821fc14 
							
						 
					 
					
						
						
							
							* Add cluster words to probs in init_model  
						
						
						
					 
					
						2015-07-23 09:27:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bee2e77983 
							
						 
					 
					
						
						
							
							* Update tests  
						
						
						
					 
					
						2015-07-23 09:26:43 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4af2595d99 
							
						 
					 
					
						
						
							
							* Fix structure of wordnet directory for init_model  
						
						
						
					 
					
						2015-07-23 06:35:38 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							16617142b7 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 06:15:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							24b98bc1c8 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 06:06:13 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1364d9c74e 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 05:58:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2c1aae3cfb 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 05:30:33 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							16d0e1f598 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 04:41:28 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0c3caae085 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 04:13:54 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							58e6adfacd 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 04:05:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c4018d6827 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 03:34:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2fb4e932a3 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 03:16:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc0e964a5c 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 03:00:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							af5bab55b3 
							
						 
					 
					
						
						
							
							* Update travis.yml  
						
						
						
					 
					
						2015-07-23 02:52:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							082ae5aaf7 
							
						 
					 
					
						
						
							
							* Fix travis.yml  
						
						
						
					 
					
						2015-07-23 02:38:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							247e592c2c 
							
						 
					 
					
						
						
							
							* Update travis.yml  
						
						
						
					 
					
						2015-07-23 02:32:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							83c0f0da22 
							
						 
					 
					
						
						
							
							* Remove lemmatizer from init_model  
						
						
						
					 
					
						2015-07-23 02:32:34 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							df01a88763 
							
						 
					 
					
						
						
							
							Merge branch 'refactor' (and serializaton)  
						
						... 
						
						
						
						Add Huffman-code serialization, and do a lot of
refactoring. Highlights include:
* Much more efficient StringStore
* Vocab maintains a by-orth mapping of Lexemes
* Avoid manually slicing Py_UNICODE buffers,
  simplifying tokenizer and vocab C APIs
* Remove various bits of dead code
* Work on removing GIL around parser
* Work on bridge to Theano
Conflicts:
	spacy/strings.pxd
	spacy/strings.pyx
	spacy/structs.pxd 
						
					 
					
						2015-07-23 02:18:35 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4970ebe887 
							
						 
					 
					
						
						
							
							* Add test_io  
						
						
						
					 
					
						2015-07-23 01:19:59 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4729200dfc 
							
						 
					 
					
						
						
							
							* Whitespace  
						
						
						
					 
					
						2015-07-23 01:19:26 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							3a5299faec 
							
						 
					 
					
						
						
							
							* Upd tests  
						
						
						
					 
					
						2015-07-23 01:19:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							20c2db08b7 
							
						 
					 
					
						
						
							
							* Add flags to pytest to tests requiring models, vectors or slow functions to be toggled.  
						
						
						
					 
					
						2015-07-23 01:19:03 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a7c4d72e83 
							
						 
					 
					
						
						
							
							* Add serializer property to Vocab, and lazy-load it. Add get_by_orth method.  
						
						
						
					 
					
						2015-07-23 01:18:19 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6ab1696b15 
							
						 
					 
					
						
						
							
							* Remove read_encoding_freqs from util.py  
						
						
						
					 
					
						2015-07-23 01:17:32 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							d5255aad77 
							
						 
					 
					
						
						
							
							* Update freqs for missing tags in ner, for serializer  
						
						
						
					 
					
						2015-07-23 01:17:11 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							12699a1152 
							
						 
					 
					
						
						
							
							* Set initial freqs, to avoid missing values in serializer  
						
						
						
					 
					
						2015-07-23 01:16:27 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							680bb47b55 
							
						 
					 
					
						
						
							
							* Write serializer freqs to single file, vocab/serializer.json  
						
						
						
					 
					
						2015-07-23 01:15:25 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							a0e36e8efc 
							
						 
					 
					
						
						
							
							* Add working to/from bytes API to Doc  
						
						
						
					 
					
						2015-07-23 01:14:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							1f31d96bf9 
							
						 
					 
					
						
						
							
							* Fix Packer API, so that it reads and writes bytes strings, instead of BitArray. Docs are always byte aligned anyway.  
						
						
						
					 
					
						2015-07-23 01:13:02 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							38ef986b29 
							
						 
					 
					
						
						
							
							* Update spacy/en/attrs.pxd  
						
						
						
					 
					
						2015-07-23 01:10:58 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							06eac32610 
							
						 
					 
					
						
						
							
							* Add cfile.pyx  
						
						
						
					 
					
						2015-07-23 01:10:36 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2b7bd46508 
							
						 
					 
					
						
						
							
							* Update get_freqs script  
						
						
						
					 
					
						2015-07-22 15:43:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							0c507bd80a 
							
						 
					 
					
						
						
							
							* Fix tokenizer  
						
						
						
					 
					
						2015-07-22 14:10:30 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c86dbe4944 
							
						 
					 
					
						
						
							
							* Update English.save_models for new Packer save/load stuff  
						
						
						
					 
					
						2015-07-22 13:40:23 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							bf77bcd6b9 
							
						 
					 
					
						
						
							
							* Add comment explaining hash_string  
						
						
						
					 
					
						2015-07-22 13:39:42 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							815bda201d 
							
						 
					 
					
						
						
							
							* Remove UniStr struct  
						
						
						
					 
					
						2015-07-22 13:39:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2fc66e3723 
							
						 
					 
					
						
						
							
							* Use Py_UNICODE in tokenizer for now, while sort out Py_UCS4 stuff  
						
						
						
					 
					
						2015-07-22 13:38:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4d61239eac 
							
						 
					 
					
						
						
							
							* Reorganize the serialization functions on Doc  
						
						
						
					 
					
						2015-07-22 04:53:01 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							109106a949 
							
						 
					 
					
						
						
							
							* Replace UniStr, using unicode objects instead  
						
						
						
					 
					
						2015-07-22 04:52:05 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							386246db5b 
							
						 
					 
					
						
						
							
							* Update init_model, making language resources optional  
						
						
						
					 
					
						2015-07-22 00:25:14 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							424854028f 
							
						 
					 
					
						
						
							
							* Fix decode_int32  
						
						
						
					 
					
						2015-07-21 20:09:59 +00:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							304d0e2633 
							
						 
					 
					
						
						
							
							* Use decode_int32 in _orth_decode  
						
						
						
					 
					
						2015-07-21 20:40:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							9cfa59ec33 
							
						 
					 
					
						
						
							
							* Optimistically try orth encoding, with char as a back-off  
						
						
						
					 
					
						2015-07-21 20:22:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c8b89e37a5 
							
						 
					 
					
						
						
							
							* Bug fix to faster huffman decoding  
						
						
						
					 
					
						2015-07-21 20:05:53 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b166d1d2a2 
							
						 
					 
					
						
						
							
							* Use encode32 and decode32  
						
						
						
					 
					
						2015-07-21 19:59:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c6cd0ddce8 
							
						 
					 
					
						
						
							
							* Add faster encode_int32 and decode_int32 methods  
						
						
						
					 
					
						2015-07-21 19:58:45 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							dd60594f41 
							
						 
					 
					
						
						
							
							* Fix double encoding error in strings.pyx  
						
						
						
					 
					
						2015-07-20 13:52:56 +02:00