Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							78487f3e66
							
						
					 | 
					
						
						
							
							* Update parser oracle for missing heads
						
						
						
						
						
					 | 
					
						2015-05-24 20:05:58 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							acd1245ad4
							
						
					 | 
					
						
						
							
							* Remove cruft from conll.pyx --- unused stuff about evlauation, which now lives in spacy.scorer
						
						
						
						
						
					 | 
					
						2015-05-24 17:35:49 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							20f1d868a3
							
						
					 | 
					
						
						
							
							* Tmp commit. Working on whole document parsing
						
						
						
						
						
					 | 
					
						2015-05-24 02:49:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f2ee9c4feb
							
						
					 | 
					
						
						
							
							* Comment out constituency parsing stuff, so that code compiles
						
						
						
						
						
					 | 
					
						2015-05-20 16:55:05 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9dfc9c039c
							
						
					 | 
					
						
						
							
							* Work on constituency parsing.
						
						
						
						
						
					 | 
					
						2015-05-20 16:02:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ba07b925a7
							
						
					 | 
					
						
						
							
							* Fix compile error in conll.pyx
						
						
						
						
						
					 | 
					
						2015-05-12 22:33:47 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f1e0272b18
							
						
					 | 
					
						
						
							
							* Disable c-parsing transitions
						
						
						
						
						
					 | 
					
						2015-05-12 22:33:25 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							03a6626545
							
						
					 | 
					
						
						
							
							* Tmp commit
						
						
						
						
						
					 | 
					
						2015-05-12 20:27:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9568ebed08
							
						
					 | 
					
						
						
							
							* Fix off-by-one in head reading
						
						
						
						
						
					 | 
					
						2015-05-12 20:27:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d2ac8d8007
							
						
					 | 
					
						
						
							
							* Add ctnt field to State, in preparation for constituency parsing
						
						
						
						
						
					 | 
					
						2015-05-12 20:27:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ab67693393
							
						
					 | 
					
						
						
							
							* Add read_json_file to conll.pyx
						
						
						
						
						
					 | 
					
						2015-05-12 20:27:55 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							aff9359a8d
							
						
					 | 
					
						
						
							
							* Update ner.pyx to expect brackets from gold_tuples
						
						
						
						
						
					 | 
					
						2015-05-12 20:27:55 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							53cf77e1c8
							
						
					 | 
					
						
						
							
							* Bug fix: when non-monotonically correct a dependency, make sure to delete the old one from the child list
						
						
						
						
						
					 | 
					
						2015-05-12 20:26:41 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a4e2af54f9
							
						
					 | 
					
						
						
							
							* Add support for l/r edge to add_dep, and move inlined methods into _state.pyx where possible
						
						
						
						
						
					 | 
					
						2015-05-12 20:26:41 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fb8d50b3d5
							
						
					 | 
					
						
						
							
							Merge branch 'master' of ssh://github.com/honnibal/spaCy
						
						
						
						
						
					 | 
					
						2015-04-30 12:45:15 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ed8e8c3bd0
							
						
					 | 
					
						
						
							
							* Whitespace
						
						
						
						
						
					 | 
					
						2015-04-29 14:22:47 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							763ef01575
							
						
					 | 
					
						
						
							
							* Fix two bugs in feature calculation
						
						
						
						
						
					 | 
					
						2015-04-28 23:25:09 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b3fd48c97b
							
						
					 | 
					
						
						
							
							* Fix missing root labels bug identified in Issue #57
						
						
						
						
						
					 | 
					
						2015-04-28 20:45:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Jordan Suchow
							
						 
					 | 
					
						
						
						
						
							
						
						
							3a8d9b37a6
							
						
					 | 
					
						
						
							
							Remove trailing whitespace
						
						
						
						
						
					 | 
					
						2015-04-19 13:01:38 -07:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							99dbf8a38c
							
						
					 | 
					
						
						
							
							* Fix error type in lookup_transition
						
						
						
						
						
					 | 
					
						2015-04-16 01:36:22 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9f16848b60
							
						
					 | 
					
						
						
							
							* Add (N0w, N1w) unigram pair to NER features, prompted by failure to detect 'this weekend'
						
						
						
						
						
					 | 
					
						2015-04-15 06:01:18 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							507048dc45
							
						
					 | 
					
						
						
							
							* Rename StandardError to Exception, for Python 3 compatibility
						
						
						
						
						
					 | 
					
						2015-04-12 07:28:34 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1d05e6da00
							
						
					 | 
					
						
						
							
							* Add ne_iob and ne_type features to NER
						
						
						
						
						
					 | 
					
						2015-04-10 19:07:08 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4df8a3d90f
							
						
					 | 
					
						
						
							
							* Add ne_iob and ne_type attributes to context vector
						
						
						
						
						
					 | 
					
						2015-04-10 05:02:15 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8c354c432b
							
						
					 | 
					
						
						
							
							* Add ValueError condition to ner_tag reading
						
						
						
						
						
					 | 
					
						2015-04-10 04:59:59 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							435cccf098
							
						
					 | 
					
						
						
							
							* Add read_conll03_file function to conll.pyx
						
						
						
						
						
					 | 
					
						2015-04-10 04:59:11 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							99c9ecfc18
							
						
					 | 
					
						
						
							
							* Fix bug in prefix, suffix and word shape features in parser and NER
						
						
						
						
						
					 | 
					
						2015-04-10 03:53:33 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5a075ea3fc
							
						
					 | 
					
						
						
							
							* Ensure NER moves are available for single-word tokens
						
						
						
						
						
					 | 
					
						2015-04-05 22:30:58 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a60a366b2c
							
						
					 | 
					
						
						
							
							* Support 'punct' dep label in conll.pyx
						
						
						
						
						
					 | 
					
						2015-04-05 22:30:19 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a3af6b7c3d
							
						
					 | 
					
						
						
							
							* Left-Arc from Root, to allow non-monotonic reduce to compete with left-arc when the stack is not empty.
						
						
						
						
						
					 | 
					
						2015-03-27 17:39:16 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							db5a43318c
							
						
					 | 
					
						
						
							
							* Improve print_state debug printer
						
						
						
						
						
					 | 
					
						2015-03-27 17:29:58 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1705eccbbe
							
						
					 | 
					
						
						
							
							* Remove whitespace
						
						
						
						
						
					 | 
					
						2015-03-27 15:22:39 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3feb52374c
							
						
					 | 
					
						
						
							
							* Break apart a condition, for ease of debug printing
						
						
						
						
						
					 | 
					
						2015-03-27 15:21:38 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b32f581acb
							
						
					 | 
					
						
						
							
							* Fix bug in ArcEager.get_labels
						
						
						
						
						
					 | 
					
						2015-03-27 15:21:06 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1320bd19db
							
						
					 | 
					
						
						
							
							* Move Span class to own file
						
						
						
						
						
					 | 
					
						2015-03-26 16:45:38 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e854ba0a13
							
						
					 | 
					
						
						
							
							* Remove support for force_gold flag from GreedyParser, since it's not so useful, and it's clutter
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6a6085f8b9
							
						
					 | 
					
						
						
							
							* Clean up GreedyParser.train function a bit
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b3157927e6
							
						
					 | 
					
						
						
							
							* Clean up unused feature templates
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							411bf377d4
							
						
					 | 
					
						
						
							
							* Remove dependency on ner_util module
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							01c892f583
							
						
					 | 
					
						
						
							
							* Add comment to fill_context
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							2741179aff
							
						
					 | 
					
						
						
							
							* Important bug fix: Fill token N2w, which was being unfilled, after a bad edit while writing the NER features.
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							71648205d9
							
						
					 | 
					
						
						
							
							* Add support for debug feature set. Just use unigrams for this.
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3b70b304b2
							
						
					 | 
					
						
						
							
							* Add words to gold_tuples from gold conll file
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:47 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							05d6065e2e
							
						
					 | 
					
						
						
							
							* Add assertion
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:46 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							377e9b29b1
							
						
					 | 
					
						
						
							
							* Whitespace
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:46 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9f4ad8fdfb
							
						
					 | 
					
						
						
							
							* Assign root words the ROOT label via the Break transition. Something is still wrong here...
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:46 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f729164c01
							
						
					 | 
					
						
						
							
							* Fix bug in label assignment: ensure null-label transitions receive the label 0
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:46 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							31fad99518
							
						
					 | 
					
						
						
							
							* Use StringStore to encode label names, instead of label_ids
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:45 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b9b695fb1b
							
						
					 | 
					
						
						
							
							* Remove debug word list
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:45 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1c843934be
							
						
					 | 
					
						
						
							
							* Fix oracle bug in NER. Now getting 77% F on ontonotes
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e181c051d5
							
						
					 | 
					
						
						
							
							* Improve features for NER
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8057a95f20
							
						
					 | 
					
						
						
							
							* NER seems to be working, scoring 69 F. Need to add decision-history features --- currently only use current word, 2 words context. Need refactoring.
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ae235e07b9
							
						
					 | 
					
						
						
							
							* Refactoring working for parser, but now need to rig up features for NER, and then debug oracle etc.
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b3eda03c9c
							
						
					 | 
					
						
						
							
							* Tmp
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:44 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6b6bce9e7a
							
						
					 | 
					
						
						
							
							* Fix label loading for transition system
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5278c7504b
							
						
					 | 
					
						
						
							
							* Hacks to conll.pyx. Should clean these up.
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f321b2b2eb
							
						
					 | 
					
						
						
							
							* Remove TODO comment
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							fdabd93bfb
							
						
					 | 
					
						
						
							
							* Ensure high loss for invalid moves, and fix label reading for arc-eager
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							10ed738df2
							
						
					 | 
					
						
						
							
							* Tmp commit
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4f83c9b3d5
							
						
					 | 
					
						
						
							
							* Make costs label-sensitive
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8c883cef58
							
						
					 | 
					
						
						
							
							* Refactored transition system code now compiling. Still need to hook up label oracle, and test
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:43 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f0159ab4b6
							
						
					 | 
					
						
						
							
							* Add file to hold GoldParse class
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:42 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8eadb984cb
							
						
					 | 
					
						
						
							
							* Refactor arc_eager to use new TransitionSystem base class. Need to fix oracle
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:42 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b063001596
							
						
					 | 
					
						
						
							
							* Add base TransitionSystem class. Still need to rethink how non-monotonic labelling will work for best_valid
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:42 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							dc986dbc0b
							
						
					 | 
					
						
						
							
							* Work on refactored parser, where TransitionSystem can be easily subclassed
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:42 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							135756ac3d
							
						
					 | 
					
						
						
							
							* Tmp commit of NER refactoring
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:42 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0ff078876a
							
						
					 | 
					
						
						
							
							* Commit some work on ner.yx done on the plane
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d81b7be6a2
							
						
					 | 
					
						
						
							
							* Merge train.py
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3d0570685c
							
						
					 | 
					
						
						
							
							* Add NER transition system
						
						
						
						
						
					 | 
					
						2015-03-26 16:44:41 +01:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ea90d136e8
							
						
					 | 
					
						
						
							
							* Fix bug in labelled parsing, that caused an 8% drop in labelled accuracy.
						
						
						
						
						
					 | 
					
						2015-02-27 03:56:10 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							312b3a45f3
							
						
					 | 
					
						
						
							
							* Fix issue #19: Allow parsing/pos tagging of empty strings
						
						
						
						
						
					 | 
					
						2015-02-10 10:15:58 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5c3513583d
							
						
					 | 
					
						
						
							
							* Clear buffered python tokens when modifying the Tokens object. Need to clean this up, and modify via a method on Tokens.
						
						
						
						
						
					 | 
					
						2015-02-09 03:57:10 -05:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							c55a33d045
							
						
					 | 
					
						
						
							
							* Catch oracle errors
						
						
						
						
						
					 | 
					
						2015-02-02 23:02:04 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d68678a93e
							
						
					 | 
					
						
						
							
							* Add Exception class, OracleError
						
						
						
						
						
					 | 
					
						2015-02-02 11:57:32 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							88170e6295
							
						
					 | 
					
						
						
							
							* Supply dep_strings as a tuple, for the changed API on Tokens
						
						
						
						
						
					 | 
					
						2015-01-31 13:42:09 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0981d68022
							
						
					 | 
					
						
						
							
							* Set a sent_end flag during parsing, for later use
						
						
						
						
						
					 | 
					
						2015-01-31 13:41:46 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							0f95712189
							
						
					 | 
					
						
						
							
							* Improve accuracy reporting during training
						
						
						
						
						
					 | 
					
						2015-01-30 18:05:06 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							67d6e53a69
							
						
					 | 
					
						
						
							
							* Ensure parser and tagger function correctly when training from missing values, indicated by -1
						
						
						
						
						
					 | 
					
						2015-01-30 14:08:56 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ebf7d2fab1
							
						
					 | 
					
						
						
							
							* Use non-joint sbd, for more simplicity and fewer classes
						
						
						
						
						
					 | 
					
						2015-01-29 06:22:03 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d05c5bf141
							
						
					 | 
					
						
						
							
							* Remove comment
						
						
						
						
						
					 | 
					
						2015-01-29 05:19:27 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							320b045daa
							
						
					 | 
					
						
						
							
							* Oracle now consistent over gold standard derivation
						
						
						
						
						
					 | 
					
						2015-01-29 03:41:58 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f590382134
							
						
					 | 
					
						
						
							
							* Work on sbd
						
						
						
						
						
					 | 
					
						2015-01-29 03:18:29 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1884a7a0be
							
						
					 | 
					
						
						
							
							* Attach comment with paper
						
						
						
						
						
					 | 
					
						2015-01-28 03:18:43 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a2d6b195db
							
						
					 | 
					
						
						
							
							* Add messy Break transitions, carefully following the scheme of Dd Zhang et al (2013)
						
						
						
						
						
					 | 
					
						2015-01-28 03:09:45 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f9ee5d9934
							
						
					 | 
					
						
						
							
							* Build a python list of word strings, for debugging
						
						
						
						
						
					 | 
					
						2015-01-28 01:06:13 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d819101571
							
						
					 | 
					
						
						
							
							* Improve error message on oracle failure
						
						
						
						
						
					 | 
					
						2015-01-28 00:58:03 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7431c133d8
							
						
					 | 
					
						
						
							
							* Add error if try to access head and not is_parsed
						
						
						
						
						
					 | 
					
						2015-01-25 15:33:54 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a97bed9359
							
						
					 | 
					
						
						
							
							* Fix POS and dependency label tag names.  Add parse and string navigation functions.
						
						
						
						
						
					 | 
					
						2015-01-24 17:29:04 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5ed8b2b98f
							
						
					 | 
					
						
						
							
							* Rename sic to orth
						
						
						
						
						
					 | 
					
						2015-01-23 02:08:25 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6c7e44140b
							
						
					 | 
					
						
						
							
							* Work on word vectors, and other stuff
						
						
						
						
						
					 | 
					
						2015-01-17 16:21:17 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							aacaf1a0f0
							
						
					 | 
					
						
						
							
							* Fix parser
						
						
						
						
						
					 | 
					
						2015-01-08 01:19:23 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9a21127bf7
							
						
					 | 
					
						
						
							
							* Fix parser, which was importing the wrong model
						
						
						
						
						
					 | 
					
						2015-01-08 00:10:15 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3f1944d688
							
						
					 | 
					
						
						
							
							* Make PyPy work
						
						
						
						
						
					 | 
					
						2015-01-05 17:54:38 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ae7c811fd1
							
						
					 | 
					
						
						
							
							* Use Exception instead of StandardError
						
						
						
						
						
					 | 
					
						2015-01-04 01:22:12 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							5d9a096e2f
							
						
					 | 
					
						
						
							
							* Some minor clean-up after HastyModel
						
						
						
						
						
					 | 
					
						2014-12-31 19:46:04 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							aafaf58cbe
							
						
					 | 
					
						
						
							
							* Refactor _ml.Model, and finish implementing HastyModel so far not worthwhile.
						
						
						
						
						
					 | 
					
						2014-12-31 19:40:59 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							1ffb0229ed
							
						
					 | 
					
						
						
							
							* Import tokens in parser.pxd
						
						
						
						
						
					 | 
					
						2014-12-30 21:21:17 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							bb80937544
							
						
					 | 
					
						
						
							
							* Upd docstrings
						
						
						
						
						
					 | 
					
						2014-12-27 18:45:16 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b8b65903fc
							
						
					 | 
					
						
						
							
							* Tmp
						
						
						
						
						
					 | 
					
						2014-12-24 17:42:00 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4c4aa2c5c9
							
						
					 | 
					
						
						
							
							* Work on train
						
						
						
						
						
					 | 
					
						2014-12-22 07:25:43 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							b34a1325d3
							
						
					 | 
					
						
						
							
							* Everything compiling after reorg. About to start testing.
						
						
						
						
						
					 | 
					
						2014-12-21 05:42:23 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							e1c1a4b868
							
						
					 | 
					
						
						
							
							* Tmp
						
						
						
						
						
					 | 
					
						2014-12-21 05:36:29 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							ff252dd535
							
						
					 | 
					
						
						
							
							* Clean up 'guess_cache' idea, which didnt work well enough
						
						
						
						
						
					 | 
					
						2014-12-20 03:49:11 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							bed680c632
							
						
					 | 
					
						
						
							
							* Remove commented-out features
						
						
						
						
						
					 | 
					
						2014-12-20 03:47:32 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							3d178c03ae
							
						
					 | 
					
						
						
							
							* Prune the features a bit
						
						
						
						
						
					 | 
					
						2014-12-20 02:46:14 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7920ea72b4
							
						
					 | 
					
						
						
							
							* Working parser with the decision memory idea. Disabling that for now, for simplicity
						
						
						
						
						
					 | 
					
						2014-12-20 01:43:15 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							a2f2a48da9
							
						
					 | 
					
						
						
							
							* Add some extra features
						
						
						
						
						
					 | 
					
						2014-12-20 01:42:24 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							53b8bc1f3c
							
						
					 | 
					
						
						
							
							* Work on implementing a trainable cache for the parser. So far, doesn't improve efficiency
						
						
						
						
						
					 | 
					
						2014-12-19 09:30:50 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							f72243b156
							
						
					 | 
					
						
						
							
							* Set const-correctness for Feature* array
						
						
						
						
						
					 | 
					
						2014-12-18 20:41:32 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							6ab7e40590
							
						
					 | 
					
						
						
							
							* Add non-monotonic parsing with cost-sensitive update. 92.26 on Y&M set
						
						
						
						
						
					 | 
					
						2014-12-18 11:33:25 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							7e0c692daf
							
						
					 | 
					
						
						
							
							* Automatically push when the stack is empty
						
						
						
						
						
					 | 
					
						2014-12-18 09:16:10 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							61142a8eff
							
						
					 | 
					
						
						
							
							* Tweak features
						
						
						
						
						
					 | 
					
						2014-12-18 09:15:03 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							8446ebfbbb
							
						
					 | 
					
						
						
							
							* Work on parser. Up to 92 UAS on YM labels
						
						
						
						
						
					 | 
					
						2014-12-18 09:05:31 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							55de747bfc
							
						
					 | 
					
						
						
							
							* Remove .cpp files
						
						
						
						
						
					 | 
					
						2014-12-18 02:43:13 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							4448a840f7
							
						
					 | 
					
						
						
							
							* Work on greedy parsing. Scoring about 91.2
						
						
						
						
						
					 | 
					
						2014-12-18 02:42:55 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							9d7d97978d
							
						
					 | 
					
						
						
							
							* Work on greedy parser
						
						
						
						
						
					 | 
					
						2014-12-17 21:09:29 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							d524dd306a
							
						
					 | 
					
						
						
							
							* Work on greedy parser
						
						
						
						
						
					 | 
					
						2014-12-17 03:19:43 +11:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								Matthew Honnibal
							
						 
					 | 
					
						
						
						
						
							
						
						
							95ccea03b2
							
						
					 | 
					
						
						
							
							* Work on greedy parser
						
						
						
						
						
					 | 
					
						2014-12-16 22:46:55 +11:00 | 
					
					
						
						
							
							
							
						
					 |