feature(populate_vocab): Enable pruning out rare words from clusters data

2025-12-22 17:43:13 +03:00 · 2017-05-12 16:15:19 +02:00 · 2017-05-12 16:15:19 +02:00 · cdaefae60a
commit cdaefae60a
parent 6e1fad92a1
1 changed files with 0 additions and 4 deletions
--- a/spacy/cli/model.py
+++ b/spacy/cli/model.py
@ -98,10 +98,6 @@ def read_clusters(clusters_path):
 def populate_vocab(vocab, clusters, probs, oov_prob):
    # Ensure probs has entries for all words seen during clustering.
    for word in clusters:
        if word not in probs:
            probs[word] = oov_prob
    for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
        lexeme = vocab[word]
        lexeme.prob = prob