Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							f4662e9218 
							
						 
					 
					
						
						
							
							Fix vector linkage for token  
						
						
						
					 
					
						2017-06-04 14:19:58 -05:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							498ad85309 
							
						 
					 
					
						
						
							
							Try using tensor for vector/similarity methdos  
						
						
						
					 
					
						2017-05-30 23:35:17 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fe11564b8e 
							
						 
					 
					
						
						
							
							Finish stringstore change. Also xfail vectors tests  
						
						
						
					 
					
						2017-05-28 15:10:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							2445707f3c 
							
						 
					 
					
						
						
							
							Re-delegate vectors to vocab  
						
						
						
					 
					
						2017-05-28 11:46:10 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							01e59e4e6e 
							
						 
					 
					
						
						
							
							* Add Token.sent_start property, re Issue  #235  
						
						
						
					 
					
						2017-05-23 18:41:11 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							7ed8a92ed1 
							
						 
					 
					
						
						
							
							Update docstrings and API docs for Token  
						
						
						
					 
					
						2017-05-20 15:13:33 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							a804045597 
							
						 
					 
					
						
						
							
							Use is_ancestor instead of deprecated is_ancestor_of  
						
						
						
					 
					
						2017-05-19 20:23:40 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e9e62b01b0 
							
						 
					 
					
						
						
							
							Update docstrings and API docs for Token  
						
						
						
					 
					
						2017-05-19 18:47:56 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							9d85cda8e4 
							
						 
					 
					
						
						
							
							Fix models error message and use about.__docs_models__ (see  #1051 )  
						
						
						
					 
					
						2017-05-13 13:05:47 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							6b942763f0 
							
						 
					 
					
						
						
							
							Tidy up imports  
						
						
						
					 
					
						2017-05-13 13:04:40 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6a4221a6de 
							
						 
					 
					
						
						
							
							Allow lemma to be set from Python. Re  #973  
						
						
						
					 
					
						2017-04-16 18:07:53 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							0739ae7b76 
							
						 
					 
					
						
						
							
							Tidy up and fix formatting and imports  
						
						
						
					 
					
						2017-04-15 13:05:15 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							e71a1f4bd0 
							
						 
					 
					
						
						
							
							Fix download commands in error messages (see  #946 )  
						
						
						
					 
					
						2017-04-01 10:20:57 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							fc3900e5b2 
							
						 
					 
					
						
						
							
							Allow ent_id to be set in Token  
						
						
						
					 
					
						2017-03-31 14:00:14 +02:00 
						 
				 
			
				
					
						
							
							
								ines 
							
						 
					 
					
						
						
						
						
							
						
						
							66c1f194f9 
							
						 
					 
					
						
						
							
							Use consistent unicode declarations  
						
						
						
					 
					
						2017-03-12 13:07:28 +01:00 
						 
				 
			
				
					
						
							
							
								Roman Inflianskas 
							
						 
					 
					
						
						
						
						
							
						
						
							66e1109b53 
							
						 
					 
					
						
						
							
							Add support for Universal Dependencies v2.0  
						
						
						
					 
					
						2017-03-03 13:17:34 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							e7f8e13cf3 
							
						 
					 
					
						
						
							
							Make Token hashable.  Fixes   #743  
						
						
						
					 
					
						2017-01-16 13:27:57 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							12cd27b821 
							
						 
					 
					
						
						
							
							Amend 8ae8b443f: Handle comparison with None tokens.  
						
						
						
					 
					
						2017-01-11 13:03:32 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8ae8b443f1 
							
						 
					 
					
						
						
							
							Add richcmp method to Token.  Closes   #631  
						
						
						
					 
					
						2017-01-09 19:30:31 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							404019ad2f 
							
						 
					 
					
						
						
							
							Fix issue  #672 : ent_iob_ was a string, not unicode, due to missing unicode_literals statement.  
						
						
						
					 
					
						2016-12-18 22:33:53 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							293c79c09a 
							
						 
					 
					
						
						
							
							Fix   #595 : Lemmatization was incorrect for base forms, because morphological analyser wasn't adding morphology properly.  
						
						
						
					 
					
						2016-11-04 00:29:07 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							05a8b752a2 
							
						 
					 
					
						
						
							
							Fix Issue  #600 : Missing setters for Token attribute.  
						
						
						
					 
					
						2016-11-02 23:28:59 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							11664b9f20 
							
						 
					 
					
						
						
							
							Fix variable error in token  
						
						
						
					 
					
						2016-11-01 13:28:00 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b86f8af0c1 
							
						 
					 
					
						
						
							
							Fix doc strings  
						
						
						
					 
					
						2016-11-01 12:25:36 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d5742b773 
							
						 
					 
					
						
						
							
							Add sentiment field to doc, rename getters_for_tokens and getters_for_spans, add user_hooks field to Doc.  
						
						
						
					 
					
						2016-10-19 20:54:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							7fd98fc91c 
							
						 
					 
					
						
						
							
							Remove deprecation shim around str/bytes in Token.  
						
						
						
					 
					
						2016-10-17 14:02:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1abc8f6ed 
							
						 
					 
					
						
						
							
							Fix deprecation stuff in Token: Remove the shim for the str/unicode semantics, and raise for has_repvec and repvec  
						
						
						
					 
					
						2016-10-17 11:18:41 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							5d10e2005c 
							
						 
					 
					
						
						
							
							Defer some attributes to Doc, via getters_for_tokens attribute.  
						
						
						
					 
					
						2016-10-17 02:44:49 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ca32a1ab01 
							
						 
					 
					
						
						
							
							Revert "Work on Issue  #285 : intern strings into document-specific pools, to address streaming data memory growth. StringStore.__getitem__ now raises KeyError when it can't find the string. Use StringStore.intern() to get the old behaviour. Still need to hunt down all uses of StringStore.__getitem__ in library and do testing, but logic looks good."  
						
						... 
						
						
						
						This reverts commit 8423e8627f 
						
					 
					
						2016-09-30 20:20:22 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6736977d82 
							
						 
					 
					
						
						
							
							Revert "Changes to Doc and Token for new string store scheme"  
						
						... 
						
						
						
						This reverts commit 99de44d864 
						
					 
					
						2016-09-30 20:11:15 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							99de44d864 
							
						 
					 
					
						
						
							
							Changes to Doc and Token for new string store scheme  
						
						
						
					 
					
						2016-09-30 20:00:21 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							8423e8627f 
							
						 
					 
					
						
						
							
							Work on Issue  #285 : intern strings into document-specific pools, to address streaming data memory growth. StringStore.__getitem__ now raises KeyError when it can't find the string. Use StringStore.intern() to get the old behaviour. Still need to hunt down all uses of StringStore.__getitem__ in library and do testing, but logic looks good.  
						
						
						
					 
					
						2016-09-30 10:14:47 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							4de13606fd 
							
						 
					 
					
						
						
							
							Fix token.pyx  
						
						
						
					 
					
						2016-09-23 15:07:07 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							b4de419e19 
							
						 
					 
					
						
						
							
							Import hash_t typedef in token.pyx  
						
						
						
					 
					
						2016-09-23 14:22:06 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							c1a2e96604 
							
						 
					 
					
						
						
							
							Clean up notes at end of token.pyx  
						
						
						
					 
					
						2016-09-21 20:45:51 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							58e83fe34b 
							
						 
					 
					
						
						
							
							Initial, limited support for quantified patterns in Matcher, and tracking of ent_id attribute in Token and Span. The quantifiers need a lot more testing, and there are some known problems. The main known problem is that the zero-plus and one-plus quantifiers won't work if a token can match both the quantified pattern expression AND the tail of the match.  
						
						
						
					 
					
						2016-09-21 14:54:55 +02:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							6df3858dbc 
							
						 
					 
					
						
						
							
							* Fix Issue  #323 : Incorrect semantics of Token.__str__ built-in. Add flag to allow users to switch the old semantics back on, to ease transition.  
						
						
						
					 
					
						2016-04-12 13:17:59 +10:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							872695759d 
							
						 
					 
					
						
						
							
							Merge pull request  #306  from wbwseeker/german_noun_chunks  
						
						... 
						
						
						
						add German noun chunk functionality 
						
					 
					
						2016-04-08 00:54:24 +10:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							d65ef41d08 
							
						 
					 
					
						
						
							
							make error messages language independent  
						
						
						
					 
					
						2016-03-24 11:47:09 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							5080077097 
							
						 
					 
					
						
						
							
							revert init_model.py back to pre-german state (because it makes more sense)  
						
						... 
						
						
						
						simplify token.n_rights and token.n_lefts 
						
					 
					
						2016-03-21 16:10:25 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							2ae253ef5b 
							
						 
					 
					
						
						
							
							changed head.__set__ to make it simpler  
						
						
						
					 
					
						2016-03-14 13:43:48 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							46e3f979f1 
							
						 
					 
					
						
						
							
							add function for setting head and label to token  
						
						... 
						
						
						
						change PseudoProjectivity.deprojectivize to use these functions 
						
					 
					
						2016-03-11 17:31:06 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							03fb498dbe 
							
						 
					 
					
						
						
							
							introduce lang field for LexemeC to hold language id  
						
						... 
						
						
						
						put noun_chunk logic into iterators.py for each language separately 
						
					 
					
						2016-03-10 13:01:34 +01:00 
						 
				 
			
				
					
						
							
							
								Wolfgang Seeker 
							
						 
					 
					
						
						
						
						
							
						
						
							3448cb40a4 
							
						 
					 
					
						
						
							
							integrated pseudo-projective parsing into parser  
						
						... 
						
						
						
						- nonproj.pyx holds a class PseudoProjectivity which currently holds
  all functionality to implement Nivre & Nilsson 2005's pseudo-projective
  parsing using the HEAD decoration scheme
- changed lefts/rights in Token to account for possible non-projective
  structures 
						
					 
					
						2016-03-01 10:09:08 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							419edfab50 
							
						 
					 
					
						
						
							
							* Use generic flags for the new attributes until they're added  
						
						
						
					 
					
						2016-02-04 15:50:54 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							11810be33e 
							
						 
					 
					
						
						
							
							* Add Python hooks for is_bracket/is_quote/is_left_punct/is_right_punct  
						
						
						
					 
					
						2016-02-04 13:04:16 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							995b2d18fd 
							
						 
					 
					
						
						
							
							* Route token.string via token.txt_with_ws, to deprecate token.string in future  
						
						
						
					 
					
						2016-01-16 17:14:34 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							03e8a4293d 
							
						 
					 
					
						
						
							
							* Add loop guard to Token.lefts and Token.rights properties  
						
						
						
					 
					
						2016-01-16 16:18:17 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							ab5aac5b2f 
							
						 
					 
					
						
						
							
							* Add .rank property to Token and Lexeme, for frequency rank  
						
						
						
					 
					
						2015-11-08 16:18:25 +01:00 
						 
				 
			
				
					
						
							
							
								Matthew Honnibal 
							
						 
					 
					
						
						
						
						
							
						
						
							68f479e821 
							
						 
					 
					
						
						
							
							* Rename Doc.data to Doc.c  
						
						
						
					 
					
						2015-11-04 00:15:14 +11:00