Merge pull request #1055 from recognai/master

Enable pruning out rare words from clusters data
This commit is contained in:
Ines Montani 2017-05-13 03:22:56 +02:00 committed by GitHub
commit 8d742ac8ff

View File

@ -98,10 +98,6 @@ def read_clusters(clusters_path):
def populate_vocab(vocab, clusters, probs, oov_prob):
# Ensure probs has entries for all words seen during clustering.
for word in clusters:
if word not in probs:
probs[word] = oov_prob
for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
lexeme = vocab[word]
lexeme.prob = prob