Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ceeda5a739
							
						
					 | 
					
						
						
							
							* Fix get_by_orth for py3
						
						
						
						
						
					 | 
					
						2015-07-26 18:39:27 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5c9b8d05e4
							
						
					 | 
					
						
						
							
							* Upd test_docs
						
						
						
						
						
					 | 
					
						2015-07-26 17:41:13 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							609f729cc5
							
						
					 | 
					
						
						
							
							* Fix infix test
						
						
						
						
						
					 | 
					
						2015-07-26 17:32:55 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3cfe3d8c1c
							
						
					 | 
					
						
						
							
							* Revert bad infix change
						
						
						
						
						
					 | 
					
						2015-07-26 17:32:37 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							460b4c3207
							
						
					 | 
					
						
						
							
							* Add more infix tests
						
						
						
						
						
					 | 
					
						2015-07-26 17:30:34 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							bd608559bc
							
						
					 | 
					
						
						
							
							* Fix infix-period tokenization
						
						
						
						
						
					 | 
					
						2015-07-26 17:14:52 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							94f314c271
							
						
					 | 
					
						
						
							
							* Fix tokenization of email addresses.
						
						
						
						
						
					 | 
					
						2015-07-26 16:38:08 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							48a4d15264
							
						
					 | 
					
						
						
							
							* Test token properties
						
						
						
						
						
					 | 
					
						2015-07-26 16:37:39 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6bb96c122d
							
						
					 | 
					
						
						
							
							* Host IS_ flags in attrs.pxd, and add properties for them on Token and Lexeme objects
						
						
						
						
						
					 | 
					
						2015-07-26 16:37:16 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							eeaea25f0c
							
						
					 | 
					
						
						
							
							* Check oov_prob file is present
						
						
						
						
						
					 | 
					
						2015-07-26 16:36:38 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							847c08e411
							
						
					 | 
					
						
						
							
							* Unhack serialization api tests
						
						
						
						
						
					 | 
					
						2015-07-26 16:23:41 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c4f20847da
							
						
					 | 
					
						
						
							
							* Fix init_model for travis tests
						
						
						
						
						
					 | 
					
						2015-07-26 14:03:30 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							09312b9353
							
						
					 | 
					
						
						
							
							* Fix init_model for travis tests
						
						
						
						
						
					 | 
					
						2015-07-26 13:55:47 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a4c2a3276
							
						
					 | 
					
						
						
							
							* Update doctests
						
						
						
						
						
					 | 
					
						2015-07-26 13:04:18 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2b2032d1a0
							
						
					 | 
					
						
						
							
							* Update doctests
						
						
						
						
						
					 | 
					
						2015-07-26 12:57:59 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							90ad717dc4
							
						
					 | 
					
						
						
							
							* Update default freq thresholds in init_model
						
						
						
						
						
					 | 
					
						2015-07-26 01:41:17 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6c01e01f12
							
						
					 | 
					
						
						
							
							* Fix some casing problems in specials.json
						
						
						
						
						
					 | 
					
						2015-07-26 01:38:29 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6a5e035a48
							
						
					 | 
					
						
						
							
							* Ensure data files are copied for tokenizer in init_model
						
						
						
						
						
					 | 
					
						2015-07-26 01:36:19 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ab93898ac6
							
						
					 | 
					
						
						
							
							* Make heuristics more explicit in init_model
						
						
						
						
						
					 | 
					
						2015-07-26 00:22:19 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7eb2446082
							
						
					 | 
					
						
						
							
							* Return empty lexeme on empty string
						
						
						
						
						
					 | 
					
						2015-07-26 00:18:30 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1b5d1da2a7
							
						
					 | 
					
						
						
							
							* Allow an OOV probability to be specified in get_lex_props
						
						
						
						
						
					 | 
					
						2015-07-26 00:03:43 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							cd6e25132b
							
						
					 | 
					
						
						
							
							* Allow an OOV probability to be specified in get_lex_props
						
						
						
						
						
					 | 
					
						2015-07-26 00:01:46 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5c04dcd7c1
							
						
					 | 
					
						
						
							
							* Fix init_model
						
						
						
						
						
					 | 
					
						2015-07-25 23:33:02 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fd525f0675
							
						
					 | 
					
						
						
							
							* Pass OOV probability around
						
						
						
						
						
					 | 
					
						2015-07-25 23:29:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5b6bf4d4a6
							
						
					 | 
					
						
						
							
							* Remove probability cap on lexicon
						
						
						
						
						
					 | 
					
						2015-07-25 23:05:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c62eb110c0
							
						
					 | 
					
						
						
							
							* Fix merge conflict in init_model
						
						
						
						
						
					 | 
					
						2015-07-25 23:04:30 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0301472d15
							
						
					 | 
					
						
						
							
							* Fix init_model
						
						
						
						
						
					 | 
					
						2015-07-25 22:56:35 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3fe14b8ed6
							
						
					 | 
					
						
						
							
							* Fix CFile for Python2
						
						
						
						
						
					 | 
					
						2015-07-25 22:55:53 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8e800adfbc
							
						
					 | 
					
						
						
							
							* Fix init_model
						
						
						
						
						
					 | 
					
						2015-07-25 22:54:08 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5f183098e4
							
						
					 | 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy
						
						
						
						
						
					 | 
					
						2015-07-25 22:37:04 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							65f3ce6c52
							
						
					 | 
					
						
						
							
							* Require preshed 0.41
						
						
						
						
						
					 | 
					
						2015-07-25 22:36:43 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6076213c16
							
						
					 | 
					
						
						
							
							* Fix init_model script
						
						
						
						
						
					 | 
					
						2015-07-25 22:35:52 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1a99eb69da
							
						
					 | 
					
						
						
							
							Merge branch 'master' of https://github.com/honnibal/spaCy
						
						
						
						
						
					 | 
					
						2015-07-25 22:19:48 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ef448649b3
							
						
					 | 
					
						
						
							
							* Add read_freqs function in init_model
						
						
						
						
						
					 | 
					
						2015-07-25 22:16:36 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2e6a60eaec
							
						
					 | 
					
						
						
							
							Merge branch 'master' of https://github.com/honnibal/spaCy
						
						
						
						
						
					 | 
					
						2015-07-25 21:14:07 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							105305b4aa
							
						
					 | 
					
						
						
							
							* Upd get_freqs script
						
						
						
						
						
					 | 
					
						2015-07-25 21:13:41 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							616445e027
							
						
					 | 
					
						
						
							
							* Add simple script to collate frequencies from sorted file
						
						
						
						
						
					 | 
					
						2015-07-25 21:12:45 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							823ef4a00b
							
						
					 | 
					
						
						
							
							* Remove profile declarations
						
						
						
						
						
					 | 
					
						2015-07-25 18:13:06 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f4809e562f
							
						
					 | 
					
						
						
							
							* Allow json to be used as a fallback if ujson is not available
						
						
						
						
						
					 | 
					
						2015-07-25 18:11:36 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9da06671cf
							
						
					 | 
					
						
						
							
							* Remove unused import
						
						
						
						
						
					 | 
					
						2015-07-25 18:11:16 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2060935cdb
							
						
					 | 
					
						
						
							
							* Remove explicit bytes type in doc.from_bytes, to accept bytearray
						
						
						
						
						
					 | 
					
						2015-07-24 04:54:13 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							aa28e2e01d
							
						
					 | 
					
						
						
							
							* Release the GIL around parse function
						
						
						
						
						
					 | 
					
						2015-07-24 04:53:27 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							287d90e792
							
						
					 | 
					
						
						
							
							* Use thinc 3.3
						
						
						
						
						
					 | 
					
						2015-07-24 04:52:50 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c52179f5fa
							
						
					 | 
					
						
						
							
							* Use print function in train.py, for py 2/3 compatibility
						
						
						
						
						
					 | 
					
						2015-07-24 04:52:35 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d62eb34b76
							
						
					 | 
					
						
						
							
							* More Py 2/3 compatibility in bit strings
						
						
						
						
						
					 | 
					
						2015-07-24 04:52:06 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6d0cdb1630
							
						
					 | 
					
						
						
							
							* Py 2/3 compatibility of serialize tests
						
						
						
						
						
					 | 
					
						2015-07-24 04:51:53 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0bb839d299
							
						
					 | 
					
						
						
							
							* Fix string coercion for Python 3
						
						
						
						
						
					 | 
					
						2015-07-24 03:49:30 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c4ff410fdb
							
						
					 | 
					
						
						
							
							* Fix bytes problems for Python3
						
						
						
						
						
					 | 
					
						2015-07-24 03:48:23 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ce984f471c
							
						
					 | 
					
						
						
							
							* Update tests for python3
						
						
						
						
						
					 | 
					
						2015-07-24 03:47:59 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1ab25e4dad
							
						
					 | 
					
						
						
							
							* Fix python3 type error
						
						
						
						
						
					 | 
					
						2015-07-24 02:45:34 +02:00 | 
					
					
						
						
							
							
							
						
					 |