* Load adverb senses

This commit is contained in:
Matthew Honnibal 2015-07-05 09:13:22 +02:00
parent 427ea16b27
commit 211058f7a6

View File

@ -82,7 +82,7 @@ def _read_probs(loc):
def _read_senses(loc):
lexicon = defaultdict(lambda: defaultdict(list))
pos_tags = [None, NOUN, VERB, ADJ, ADV, None]
pos_tags = [None, NOUN, VERB, ADJ, ADV, ADJ]
for line in codecs.open(str(loc), 'r', 'utf8'):
sense_key, synset_offset, sense_number, tag_cnt = line.split()
lemma, lex_sense = sense_key.split('%')
@ -123,13 +123,13 @@ def setup_vocab(src_dir, dst_dir):
for lemma in lemmatizer(word.lower(), pos):
lemmas.append(lemma)
orth_senses.update(senses[lemma][pos])
orth_senses.update(senses[word.lower()][ADV])
entry['senses'] = list(sorted(orth_senses))
vocab[word] = entry
vocab.dump(str(dst_dir / 'lexemes.bin'))
vocab.strings.dump(str(dst_dir / 'strings.txt'))
def main(lang_data_dir, corpora_dir, model_dir):
model_dir = Path(model_dir)
lang_data_dir = Path(lang_data_dir)