mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Upd get_freqs script
This commit is contained in:
parent
2b7bd46508
commit
105305b4aa
|
@ -40,6 +40,7 @@ def null_props(string):
|
|||
|
||||
|
||||
def count_freqs(input_loc, output_loc):
|
||||
print output_loc
|
||||
nlp = spacy.en.English(Parser=None, Tagger=None, Entity=None, load_vectors=False)
|
||||
nlp.vocab.lexeme_props_getter = null_props
|
||||
|
||||
|
@ -94,8 +95,10 @@ def main(input_loc, freqs_dir, output_loc, n_jobs=2, skip_existing=False):
|
|||
if not path.exists(output_path) or not skip_existing:
|
||||
tasks.append((input_path, output_path))
|
||||
|
||||
if tasks:
|
||||
parallelize(count_freqs, tasks, n_jobs)
|
||||
|
||||
print "Merge"
|
||||
merge_counts(outputs, output_loc)
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue
Block a user