mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-27 10:26:35 +03:00
* Fix gather_freqs.py
This commit is contained in:
parent
f9e765cae7
commit
5dc6cffc67
|
@ -1,11 +1,13 @@
|
||||||
|
from __future__ import unicode_literals
|
||||||
import plac
|
import plac
|
||||||
|
import io
|
||||||
|
|
||||||
def main(in_loc, out_loc):
|
def main(in_loc, out_loc):
|
||||||
out_file = open(out_loc, 'w')
|
|
||||||
this_key = None
|
this_key = None
|
||||||
this_freq = 0
|
this_freq = 0
|
||||||
df = 0
|
df = 0
|
||||||
for line in open(in_loc):
|
with io.open(out_loc, 'w', encoding='utf8') as out_file:
|
||||||
|
for line in io.open(in_loc, encoding='utf8'):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if not line:
|
if not line:
|
||||||
continue
|
continue
|
||||||
|
@ -19,8 +21,8 @@ def main(in_loc, out_loc):
|
||||||
else:
|
else:
|
||||||
this_freq += freq
|
this_freq += freq
|
||||||
df += 1
|
df += 1
|
||||||
|
this_key = key
|
||||||
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
|
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
|
||||||
out_file.close()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
Loading…
Reference in New Issue
Block a user