* Fix gather_freqs.py

This commit is contained in:
Matthew Honnibal 2016-02-04 20:21:58 +01:00
parent 48ce09687d
commit a66e2f2f53

View File

@ -1,26 +1,28 @@
from __future__ import unicode_literals
import plac import plac
import io
def main(in_loc, out_loc): def main(in_loc, out_loc):
out_file = open(out_loc, 'w')
this_key = None this_key = None
this_freq = 0 this_freq = 0
df = 0 df = 0
for line in open(in_loc): with io.open(out_loc, 'w', encoding='utf8') as out_file:
line = line.strip() for line in io.open(in_loc, encoding='utf8'):
if not line: line = line.strip()
continue if not line:
freq, key = line.split('\t', 1) continue
freq = int(freq) freq, key = line.split('\t', 1)
if this_key is not None and key != this_key: freq = int(freq)
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) if this_key is not None and key != this_key:
this_key = key out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
this_freq = freq this_key = key
df = 1 this_freq = freq
else: df = 1
this_freq += freq else:
df += 1 this_freq += freq
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) df += 1
out_file.close() this_key = key
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
if __name__ == '__main__': if __name__ == '__main__':