From 174ed1ad20d37d5204fe60a6f939fb814186f791 Mon Sep 17 00:00:00 2001 From: Matthew Honnibal Date: Mon, 27 Jul 2015 21:44:51 +0200 Subject: [PATCH] * Tighten the frequency filter in init_model --- bin/init_model.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/init_model.py b/bin/init_model.py index c511398e5..db01885b3 100644 --- a/bin/init_model.py +++ b/bin/init_model.py @@ -89,7 +89,7 @@ def _read_probs(loc): return probs, probs['-OOV-'] -def _read_freqs(loc, max_length=100, min_doc_freq=5, min_freq=100): +def _read_freqs(loc, max_length=100, min_doc_freq=5, min_freq=200): if not loc.exists(): print("Warning: Frequencies file not found") return {}, 0.0 @@ -152,7 +152,7 @@ def setup_vocab(src_dir, dst_dir): clusters = _read_clusters(src_dir / 'clusters.txt') probs, oov_prob = _read_probs(src_dir / 'words.sgt.prob') if not probs: - probs, oov_prob = _read_freqs(src_dir / 'freqs.txt.gz') + probs, oov_prob = _read_freqs(src_dir / 'freqs.txt') if not probs: oov_prob = 0.0 else: