mirror of
https://github.com/explosion/spaCy.git
synced 2025-04-05 09:44:12 +03:00
Merge pull request #1055 from recognai/master
Enable pruning out rare words from clusters data
This commit is contained in:
commit
8d742ac8ff
|
@ -98,10 +98,6 @@ def read_clusters(clusters_path):
|
|||
|
||||
|
||||
def populate_vocab(vocab, clusters, probs, oov_prob):
|
||||
# Ensure probs has entries for all words seen during clustering.
|
||||
for word in clusters:
|
||||
if word not in probs:
|
||||
probs[word] = oov_prob
|
||||
for word, prob in reversed(sorted(list(probs.items()), key=lambda item: item[1])):
|
||||
lexeme = vocab[word]
|
||||
lexeme.prob = prob
|
||||
|
|
Loading…
Reference in New Issue
Block a user