feature(populate_vocab): Enable pruning out rare words from clusters data

This commit is contained in:
oeg 2017-05-12 16:15:19 +02:00
parent 6e1fad92a1
commit cdaefae60a

View File

@ -98,10 +98,6 @@ def read_clusters(clusters_path):
def populate_vocab(vocab, clusters, probs, oov_prob): def populate_vocab(vocab, clusters, probs, oov_prob):
# Ensure probs has entries for all words seen during clustering.
for word in clusters:
if word not in probs:
probs[word] = oov_prob
for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])): for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
lexeme = vocab[word] lexeme = vocab[word]
lexeme.prob = prob lexeme.prob = prob