From cdaefae60ac08fc0093f86a83d0c5953197eb9fd Mon Sep 17 00:00:00 2001
From: oeg <daniel@recogn.ai>
Date: Fri, 12 May 2017 16:15:19 +0200
Subject: [PATCH] feature(populate_vocab): Enable pruning out rare words from
 clusters data

---
 spacy/cli/model.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/spacy/cli/model.py b/spacy/cli/model.py
index 3b9a77b93..4e7e0282b 100644
--- a/spacy/cli/model.py
+++ b/spacy/cli/model.py
@@ -98,10 +98,6 @@ def read_clusters(clusters_path):
 
 
 def populate_vocab(vocab, clusters, probs, oov_prob):
-    # Ensure probs has entries for all words seen during clustering.
-    for word in clusters:
-        if word not in probs:
-            probs[word] = oov_prob
     for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
         lexeme = vocab[word]
         lexeme.prob = prob