Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6b586cdad4
							
						
					 | 
					
						
						
							
							* Change lexemes.bin format. Add a header specifying size of LexemeC and number of lexemes, and don't have the redundant orth information.
						
						
						
						
						
					 | 
					
						2015-07-27 08:31:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8e4c69ee8c
							
						
					 | 
					
						
						
							
							* Add is_oov property, and fix up handling of attributes
						
						
						
						
						
					 | 
					
						2015-07-27 01:50:06 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fc268f03eb
							
						
					 | 
					
						
						
							
							* Assert against null pointer exceptions in vocab
						
						
						
						
						
					 | 
					
						2015-07-27 01:00:10 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0f093fdb30
							
						
					 | 
					
						
						
							
							* Fix get_by_orth for py3
						
						
						
						
						
					 | 
					
						2015-07-26 19:26:41 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ceeda5a739
							
						
					 | 
					
						
						
							
							* Fix get_by_orth for py3
						
						
						
						
						
					 | 
					
						2015-07-26 18:39:27 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6bb96c122d
							
						
					 | 
					
						
						
							
							* Host IS_ flags in attrs.pxd, and add properties for them on Token and Lexeme objects
						
						
						
						
						
					 | 
					
						2015-07-26 16:37:16 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7eb2446082
							
						
					 | 
					
						
						
							
							* Return empty lexeme on empty string
						
						
						
						
						
					 | 
					
						2015-07-26 00:18:30 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fd525f0675
							
						
					 | 
					
						
						
							
							* Pass OOV probability around
						
						
						
						
						
					 | 
					
						2015-07-25 23:29:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							22028602a9
							
						
					 | 
					
						
						
							
							* Add unicode_literals declaration in vocab.pyx
						
						
						
						
						
					 | 
					
						2015-07-23 13:24:20 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a7c4d72e83
							
						
					 | 
					
						
						
							
							* Add serializer property to Vocab, and lazy-load it. Add get_by_orth method.
						
						
						
						
						
					 | 
					
						2015-07-23 01:18:19 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							109106a949
							
						
					 | 
					
						
						
							
							* Replace UniStr, using unicode objects instead
						
						
						
						
						
					 | 
					
						2015-07-22 04:52:05 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1f7170e0e1
							
						
					 | 
					
						
						
							
							* Reinstate the fixed vocabulary --- words are only added to the lexicon in init_model, after that we create LexemeC structs with the Pool given to us.
						
						
						
						
						
					 | 
					
						2015-07-20 01:37:34 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							317cbbc015
							
						
					 | 
					
						
						
							
							* Serialization round trip now working with decent API, but with rough spots in the organisation and requiring vocabulary to be fixed ahead of time.
						
						
						
						
						
					 | 
					
						2015-07-19 15:18:17 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							82d84b0f2b
							
						
					 | 
					
						
						
							
							* Index lexemes by orth, instead of a lexemes vector. Breaks the mechanism for deciding not to own LexemeC structs during parsing. Need to reinstate this.
						
						
						
						
						
					 | 
					
						2015-07-18 22:42:15 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c2c83120d4
							
						
					 | 
					
						
						
							
							* Remove codec property from Vocab
						
						
						
						
						
					 | 
					
						2015-07-17 16:40:11 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							db9dfd2e23
							
						
					 | 
					
						
						
							
							* Major refactor of serialization. Nearly complete now.
						
						
						
						
						
					 | 
					
						2015-07-17 01:27:54 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2a5d050134
							
						
					 | 
					
						
						
							
							* Give codec loading back to Vocab.
						
						
						
						
						
					 | 
					
						2015-07-16 17:45:42 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b59d271510
							
						
					 | 
					
						
						
							
							* Move serialization functionality into Serializer class
						
						
						
						
						
					 | 
					
						2015-07-16 11:23:48 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							af5cc926a4
							
						
					 | 
					
						
						
							
							* Add codec property to Vocab, to use the Huffman encoding
						
						
						
						
						
					 | 
					
						2015-07-13 13:55:14 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							abc43b852d
							
						
					 | 
					
						
						
							
							* Add pos_tags attr to Vocab.
						
						
						
						
						
					 | 
					
						2015-07-08 12:36:38 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c04e6ebca6
							
						
					 | 
					
						
						
							
							* Allow user to load different sized vectors.
						
						
						
						
						
					 | 
					
						2015-06-05 16:26:39 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							adeb57cb1e
							
						
					 | 
					
						
						
							
							* Fix long line
						
						
						
						
						
					 | 
					
						2015-06-01 23:07:00 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							eba7b34f66
							
						
					 | 
					
						
						
							
							* Add flag to disable loading of word vectors
						
						
						
						
						
					 | 
					
						2015-05-25 01:02:42 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e73eaf2d05
							
						
					 | 
					
						
						
							
							* Replace some assertions with proper errors
						
						
						
						
						
					 | 
					
						2015-05-08 16:52:17 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jordan Suchow
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a8d9b37a6
							
						
					 | 
					
						
						
							
							Remove trailing whitespace
						
						
						
						
						
					 | 
					
						2015-04-19 13:01:38 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f0e0588833
							
						
					 | 
					
						
						
							
							* Fill L2 norm attribute on LexemeC struct
						
						
						
						
						
					 | 
					
						2015-02-07 08:44:42 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							76d9394cb4
							
						
					 | 
					
						
						
							
							* Fix vocab.pyx for Python3
						
						
						
						
						
					 | 
					
						2015-02-01 13:14:04 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ce3ae8b5d9
							
						
					 | 
					
						
						
							
							* Fix platform-specific lexicon bug.
						
						
						
						
						
					 | 
					
						2015-01-31 16:38:58 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d4a493855e
							
						
					 | 
					
						
						
							
							* Fix error msg
						
						
						
						
						
					 | 
					
						2015-01-25 23:01:30 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c1c3dba4cb
							
						
					 | 
					
						
						
							
							* Check whether vector files are present before trying to load them.
						
						
						
						
						
					 | 
					
						2015-01-25 18:16:48 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fda94271af
							
						
					 | 
					
						
						
							
							* Rename NORM1 and NORM2 attrs to lower and norm
						
						
						
						
						
					 | 
					
						2015-01-24 06:17:03 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d460c28838
							
						
					 | 
					
						
						
							
							* Rename vec to repvec
						
						
						
						
						
					 | 
					
						2015-01-22 02:06:22 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6c7e44140b
							
						
					 | 
					
						
						
							
							* Work on word vectors, and other stuff
						
						
						
						
						
					 | 
					
						2015-01-17 16:21:17 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7d3c40de7d
							
						
					 | 
					
						
						
							
							* Tests passing after refactor. API has obvious warts, particularly in Token and Lexeme
						
						
						
						
						
					 | 
					
						2015-01-15 00:33:16 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0930892fc1
							
						
					 | 
					
						
						
							
							* Tmp. Working on refactor. Compiles, must hook up lexical feats.
						
						
						
						
						
					 | 
					
						2015-01-14 00:03:48 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							46da3d74d2
							
						
					 | 
					
						
						
							
							* Tmp. Refactoring, introducing a Lexeme PyObject.
						
						
						
						
						
					 | 
					
						2015-01-12 11:23:44 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ce2edd6312
							
						
					 | 
					
						
						
							
							* Tmp commit. Refactoring to create a Python Lexeme class.
						
						
						
						
						
					 | 
					
						2015-01-12 10:26:22 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a58920cc5e
							
						
					 | 
					
						
						
							
							* Import orth.word_shape as a C module
						
						
						
						
						
					 | 
					
						2015-01-06 03:18:22 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f5d41028b5
							
						
					 | 
					
						
						
							
							* Move around data files for test release
						
						
						
						
						
					 | 
					
						2015-01-03 01:59:22 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							bb80937544
							
						
					 | 
					
						
						
							
							* Upd docstrings
						
						
						
						
						
					 | 
					
						2014-12-27 18:45:16 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b8b65903fc
							
						
					 | 
					
						
						
							
							* Tmp
						
						
						
						
						
					 | 
					
						2014-12-24 17:42:00 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							73f200436f
							
						
					 | 
					
						
						
							
							* Tests passing except for morphology/lemmatization stuff
						
						
						
						
						
					 | 
					
						2014-12-23 11:40:32 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2a89d70429
							
						
					 | 
					
						
						
							
							* Add vocab.pyx to setup, and ensure we can import spacy.en.lang
						
						
						
						
						
					 | 
					
						2014-12-21 06:03:53 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e1c1a4b868
							
						
					 | 
					
						
						
							
							* Tmp
						
						
						
						
						
					 | 
					
						2014-12-21 05:36:29 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d11c1edf8c
							
						
					 | 
					
						
						
							
							* Import slice_unicode from strings.pyx
						
						
						
						
						
					 | 
					
						2014-12-20 07:56:26 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							116f7f3bc1
							
						
					 | 
					
						
						
							
							* Rename Lexicon to Vocab, and move it to its own file
						
						
						
						
						
					 | 
					
						2014-12-20 06:54:03 +11:00 | 
					
					
						
						
							
							
							
						
					 |