mirror of
https://github.com/explosion/spaCy.git
synced 2024-11-14 05:37:03 +03:00
* Load adverb senses
This commit is contained in:
parent
427ea16b27
commit
211058f7a6
|
@ -82,7 +82,7 @@ def _read_probs(loc):
|
|||
|
||||
def _read_senses(loc):
|
||||
lexicon = defaultdict(lambda: defaultdict(list))
|
||||
pos_tags = [None, NOUN, VERB, ADJ, ADV, None]
|
||||
pos_tags = [None, NOUN, VERB, ADJ, ADV, ADJ]
|
||||
for line in codecs.open(str(loc), 'r', 'utf8'):
|
||||
sense_key, synset_offset, sense_number, tag_cnt = line.split()
|
||||
lemma, lex_sense = sense_key.split('%')
|
||||
|
@ -123,13 +123,13 @@ def setup_vocab(src_dir, dst_dir):
|
|||
for lemma in lemmatizer(word.lower(), pos):
|
||||
lemmas.append(lemma)
|
||||
orth_senses.update(senses[lemma][pos])
|
||||
orth_senses.update(senses[word.lower()][ADV])
|
||||
entry['senses'] = list(sorted(orth_senses))
|
||||
vocab[word] = entry
|
||||
vocab.dump(str(dst_dir / 'lexemes.bin'))
|
||||
vocab.strings.dump(str(dst_dir / 'strings.txt'))
|
||||
|
||||
|
||||
|
||||
def main(lang_data_dir, corpora_dir, model_dir):
|
||||
model_dir = Path(model_dir)
|
||||
lang_data_dir = Path(lang_data_dir)
|
||||
|
|
Loading…
Reference in New Issue
Block a user