mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	* Add simple script to collate frequencies from sorted file
This commit is contained in:
		
							parent
							
								
									823ef4a00b
								
							
						
					
					
						commit
						616445e027
					
				
							
								
								
									
										27
									
								
								bin/gather_freqs.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								bin/gather_freqs.py
									
									
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,27 @@
 | 
				
			||||||
 | 
					import plac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def main(in_loc, out_loc):
 | 
				
			||||||
 | 
					    out_file = open(out_loc, 'w')
 | 
				
			||||||
 | 
					    this_key = None
 | 
				
			||||||
 | 
					    this_freq = 0
 | 
				
			||||||
 | 
					    df = 0
 | 
				
			||||||
 | 
					    for line in open(in_loc):
 | 
				
			||||||
 | 
					        line = line.strip()
 | 
				
			||||||
 | 
					        if not line:
 | 
				
			||||||
 | 
					            continue
 | 
				
			||||||
 | 
					        freq, key = line.split('\t', 1)
 | 
				
			||||||
 | 
					        freq = int(freq)
 | 
				
			||||||
 | 
					        if this_key is not None and key != this_key:
 | 
				
			||||||
 | 
					            out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
 | 
				
			||||||
 | 
					            this_key = key
 | 
				
			||||||
 | 
					            this_freq = freq
 | 
				
			||||||
 | 
					            df = 1
 | 
				
			||||||
 | 
					        else:
 | 
				
			||||||
 | 
					            this_freq += freq
 | 
				
			||||||
 | 
					            df += 1
 | 
				
			||||||
 | 
					    out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
 | 
				
			||||||
 | 
					    out_file.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					if __name__ == '__main__':
 | 
				
			||||||
 | 
					    plac.call(main)
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user