mirror of
https://github.com/explosion/spaCy.git
synced 2024-12-26 01:46:28 +03:00
* Add cluster words to probs in init_model
This commit is contained in:
parent
bee2e77983
commit
da4821fc14
|
@ -115,6 +115,14 @@ def setup_vocab(src_dir, dst_dir):
|
||||||
vocab = Vocab(data_dir=None, get_lex_props=get_lex_props)
|
vocab = Vocab(data_dir=None, get_lex_props=get_lex_props)
|
||||||
clusters = _read_clusters(src_dir / 'clusters.txt')
|
clusters = _read_clusters(src_dir / 'clusters.txt')
|
||||||
probs = _read_probs(src_dir / 'words.sgt.prob')
|
probs = _read_probs(src_dir / 'words.sgt.prob')
|
||||||
|
if not probs:
|
||||||
|
min_prob = 0.0
|
||||||
|
else:
|
||||||
|
min_prob = min(probs.values())
|
||||||
|
for word in clusters:
|
||||||
|
if word not in probs:
|
||||||
|
probs[word] = min_prob
|
||||||
|
|
||||||
lexicon = []
|
lexicon = []
|
||||||
for word, prob in reversed(sorted(probs.items(), key=lambda item: item[1])):
|
for word, prob in reversed(sorted(probs.items(), key=lambda item: item[1])):
|
||||||
entry = get_lex_props(word)
|
entry = get_lex_props(word)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user