diff --git a/bin/gather_freqs.py b/bin/gather_freqs.py index f0cbdfa4f..fa3d61ee1 100644 --- a/bin/gather_freqs.py +++ b/bin/gather_freqs.py @@ -1,26 +1,28 @@ +from __future__ import unicode_literals import plac +import io def main(in_loc, out_loc): - out_file = open(out_loc, 'w') this_key = None this_freq = 0 df = 0 - for line in open(in_loc): - line = line.strip() - if not line: - continue - freq, key = line.split('\t', 1) - freq = int(freq) - if this_key is not None and key != this_key: - out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) - this_key = key - this_freq = freq - df = 1 - else: - this_freq += freq - df += 1 - out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) - out_file.close() + with io.open(out_loc, 'w', encoding='utf8') as out_file: + for line in io.open(in_loc, encoding='utf8'): + line = line.strip() + if not line: + continue + freq, key = line.split('\t', 1) + freq = int(freq) + if this_key is not None and key != this_key: + out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) + this_key = key + this_freq = freq + df = 1 + else: + this_freq += freq + df += 1 + this_key = key + out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key)) if __name__ == '__main__':