From cdaefae60ac08fc0093f86a83d0c5953197eb9fd Mon Sep 17 00:00:00 2001 From: oeg Date: Fri, 12 May 2017 16:15:19 +0200 Subject: [PATCH] feature(populate_vocab): Enable pruning out rare words from clusters data --- spacy/cli/model.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/spacy/cli/model.py b/spacy/cli/model.py index 3b9a77b93..4e7e0282b 100644 --- a/spacy/cli/model.py +++ b/spacy/cli/model.py @@ -98,10 +98,6 @@ def read_clusters(clusters_path): def populate_vocab(vocab, clusters, probs, oov_prob): - # Ensure probs has entries for all words seen during clustering. - for word in clusters: - if word not in probs: - probs[word] = oov_prob for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])): lexeme = vocab[word] lexeme.prob = prob