svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							cd6c263fe4
							
						
					 | 
					
						
						
							
							format offsets
						
						
						
						
						
					 | 
					
						2019-07-23 11:31:29 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							9f8c1e71a2
							
						
					 | 
					
						
						
							
							fix for Issue #4000
						
						
						
						
						
					 | 
					
						2019-07-22 13:34:12 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							21176517a7
							
						
					 | 
					
						
						
							
							have gold.links correspond exactly to doc.ents
						
						
						
						
						
					 | 
					
						2019-07-19 12:36:15 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							e1213eaf6a
							
						
					 | 
					
						
						
							
							use original gold object in get_loss function
						
						
						
						
						
					 | 
					
						2019-07-18 13:35:10 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							ec55d2fccd
							
						
					 | 
					
						
						
							
							filter training data beforehand (+black formatting)
						
						
						
						
						
					 | 
					
						2019-07-18 10:22:24 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							b7a0c9bf60
							
						
					 | 
					
						
						
							
							fixing the context/prior weight settings
						
						
						
						
						
					 | 
					
						2019-07-03 17:48:09 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							8840d4b1b3
							
						
					 | 
					
						
						
							
							fix for context encoder optimizer
						
						
						
						
						
					 | 
					
						2019-07-03 13:35:36 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							3420cbe496
							
						
					 | 
					
						
						
							
							small fixes
						
						
						
						
						
					 | 
					
						2019-07-03 10:25:51 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							2d2dea9924
							
						
					 | 
					
						
						
							
							experiment with adding NER types to the feature vector
						
						
						
						
						
					 | 
					
						2019-06-29 14:52:36 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							c664f58246
							
						
					 | 
					
						
						
							
							adding prior probability as feature in the model
						
						
						
						
						
					 | 
					
						2019-06-28 16:22:58 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							68a0662019
							
						
					 | 
					
						
						
							
							context encoder with Tok2Vec + linking model instead of cosine
						
						
						
						
						
					 | 
					
						2019-06-28 08:29:31 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							dbc53b9870
							
						
					 | 
					
						
						
							
							rename to KBEntryC
						
						
						
						
						
					 | 
					
						2019-06-26 15:55:26 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							1de61f68d6
							
						
					 | 
					
						
						
							
							improve speed of prediction loop
						
						
						
						
						
					 | 
					
						2019-06-26 13:53:10 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							bee23cd8af
							
						
					 | 
					
						
						
							
							try Tok2Vec instead of SpacyVectors
						
						
						
						
						
					 | 
					
						2019-06-25 16:09:22 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							b58bace84b
							
						
					 | 
					
						
						
							
							small fixes
						
						
						
						
						
					 | 
					
						2019-06-24 10:55:04 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							a31648d28b
							
						
					 | 
					
						
						
							
							further code cleanup
						
						
						
						
						
					 | 
					
						2019-06-19 09:15:43 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							478305cd3f
							
						
					 | 
					
						
						
							
							small tweaks and documentation
						
						
						
						
						
					 | 
					
						2019-06-18 18:38:09 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							0d177c1146
							
						
					 | 
					
						
						
							
							clean up code, remove old code, move to bin
						
						
						
						
						
					 | 
					
						2019-06-18 13:20:40 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							6961215578
							
						
					 | 
					
						
						
							
							refactor code to separate functionality into different files
						
						
						
						
						
					 | 
					
						2019-05-06 10:56:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							f5190267e7
							
						
					 | 
					
						
						
							
							run only 100M of WP data as training dataset (9%)
						
						
						
						
						
					 | 
					
						2019-05-03 18:09:09 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							4e929600e5
							
						
					 | 
					
						
						
							
							fix WP id parsing, speed up processing and remove ambiguous strings in one doc (for now)
						
						
						
						
						
					 | 
					
						2019-05-03 17:37:47 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							34600c92bd
							
						
					 | 
					
						
						
							
							try catch per article to ensure the pipeline goes on
						
						
						
						
						
					 | 
					
						2019-05-03 15:10:09 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							bbcb9da466
							
						
					 | 
					
						
						
							
							creating training data with clean WP texts and QID entities true/false
						
						
						
						
						
					 | 
					
						2019-05-03 10:44:29 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							cba9680d13
							
						
					 | 
					
						
						
							
							run NER on clean WP text and link to gold-standard entity IDs
						
						
						
						
						
					 | 
					
						2019-05-02 17:24:52 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							581dc9742d
							
						
					 | 
					
						
						
							
							parsing clean text from WP articles to use as input data for NER and NEL
						
						
						
						
						
					 | 
					
						2019-05-02 17:09:56 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							8353552191
							
						
					 | 
					
						
						
							
							cleanup
						
						
						
						
						
					 | 
					
						2019-05-01 23:26:16 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							1ae41daaa9
							
						
					 | 
					
						
						
							
							allow small rounding errors
						
						
						
						
						
					 | 
					
						2019-05-01 23:05:40 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							3629a52ede
							
						
					 | 
					
						
						
							
							reading all persons in wikidata
						
						
						
						
						
					 | 
					
						2019-05-01 01:00:59 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							60b54ae8ce
							
						
					 | 
					
						
						
							
							bulk entity writing and experiment with regex wikidata reader to speed up processing
						
						
						
						
						
					 | 
					
						2019-05-01 00:00:38 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							653b7d9c87
							
						
					 | 
					
						
						
							
							calculate entity raw counts offline to speed up KB construction
						
						
						
						
						
					 | 
					
						2019-04-30 11:39:42 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							19e8f339cb
							
						
					 | 
					
						
						
							
							deduce entity freq from WP corpus and serialize vocab in WP test
						
						
						
						
						
					 | 
					
						2019-04-29 17:37:29 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							54d0cea062
							
						
					 | 
					
						
						
							
							unit test for KB serialization
						
						
						
						
						
					 | 
					
						2019-04-24 23:52:34 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							3e0cb69065
							
						
					 | 
					
						
						
							
							KB aliases to and from file
						
						
						
						
						
					 | 
					
						2019-04-24 20:24:24 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							ad6c5e581c
							
						
					 | 
					
						
						
							
							writing and reading number of entries to/from header
						
						
						
						
						
					 | 
					
						2019-04-24 15:31:44 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							6e3223f234
							
						
					 | 
					
						
						
							
							bulk loading in proper order of entity indices
						
						
						
						
						
					 | 
					
						2019-04-24 11:26:38 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							694fea597a
							
						
					 | 
					
						
						
							
							dumping all entryC entries + (inefficient) reading back in
						
						
						
						
						
					 | 
					
						2019-04-23 18:36:50 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							8e70a564f1
							
						
					 | 
					
						
						
							
							custom reader and writer for _EntryC fields (first stab at it - not complete)
						
						
						
						
						
					 | 
					
						2019-04-23 16:33:40 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							004e5e7d1c
							
						
					 | 
					
						
						
							
							little fixes
						
						
						
						
						
					 | 
					
						2019-04-19 14:24:02 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							9a8197185b
							
						
					 | 
					
						
						
							
							fix alias capitalization
						
						
						
						
						
					 | 
					
						2019-04-18 22:37:50 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							9f308eb5dc
							
						
					 | 
					
						
						
							
							fixes for prior prob and linking wikidata IDs with wikipedia titles
						
						
						
						
						
					 | 
					
						2019-04-18 16:14:25 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							10ee8dfea2
							
						
					 | 
					
						
						
							
							poc with few entities and collecting aliases from the WP links
						
						
						
						
						
					 | 
					
						2019-04-18 14:12:17 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							6763e025e1
							
						
					 | 
					
						
						
							
							parse wp dump for links to determine prior probabilities
						
						
						
						
						
					 | 
					
						2019-04-15 11:41:57 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							3163331b1e
							
						
					 | 
					
						
						
							
							wikipedia dump parser and mediawiki format regex cleanup
						
						
						
						
						
					 | 
					
						2019-04-14 21:52:01 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							b31a390a9a
							
						
					 | 
					
						
						
							
							reading types, claims and sitelinks
						
						
						
						
						
					 | 
					
						2019-04-11 21:42:44 +02:00 | 
					
					
						
						
							
							
							
						
					 | 
				
			
				
					
						
							
							
								 
								svlandeg
							
						 
					 | 
					
						
						
						
						
							
						
						
							6e997be4b4
							
						
					 | 
					
						
						
							
							reading wikidata descriptions and aliases
						
						
						
						
						
					 | 
					
						2019-04-11 21:08:22 +02:00 | 
					
					
						
						
							
							
							
						
					 |