mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-10 19:57:17 +03:00
Delete gather_freqs.py
This script was in a broken state, and should be unnecessary. The functionality is subsumed by `get_freqs.py`
This commit is contained in:
parent
ae2b479312
commit
9d51e4d13c
|
@ -1,29 +0,0 @@
|
|||
from __future__ import unicode_literals
|
||||
import plac
|
||||
import io
|
||||
|
||||
def main(in_loc, out_loc):
|
||||
this_key = None
|
||||
this_freq = 0
|
||||
df = 0
|
||||
with io.open(out_loc, 'w', encoding='utf8') as out_file:
|
||||
for line in io.open(in_loc, encoding='utf8'):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
freq, key = line.split('\t', 1)
|
||||
freq = int(freq)
|
||||
if this_key is not None and key != this_key:
|
||||
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
|
||||
this_key = key
|
||||
this_freq = freq
|
||||
df = 1
|
||||
else:
|
||||
this_freq += freq
|
||||
df += 1
|
||||
this_key = key
|
||||
out_file.write('%d\t%d\t%s\n' % (this_freq, df, this_key))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
plac.call(main)
|
Loading…
Reference in New Issue
Block a user