mirror of
https://github.com/explosion/spaCy.git
synced 2025-03-13 07:55:49 +03:00
* Load adverb senses
This commit is contained in:
parent
427ea16b27
commit
211058f7a6
|
@ -82,7 +82,7 @@ def _read_probs(loc):
|
||||||
|
|
||||||
def _read_senses(loc):
|
def _read_senses(loc):
|
||||||
lexicon = defaultdict(lambda: defaultdict(list))
|
lexicon = defaultdict(lambda: defaultdict(list))
|
||||||
pos_tags = [None, NOUN, VERB, ADJ, ADV, None]
|
pos_tags = [None, NOUN, VERB, ADJ, ADV, ADJ]
|
||||||
for line in codecs.open(str(loc), 'r', 'utf8'):
|
for line in codecs.open(str(loc), 'r', 'utf8'):
|
||||||
sense_key, synset_offset, sense_number, tag_cnt = line.split()
|
sense_key, synset_offset, sense_number, tag_cnt = line.split()
|
||||||
lemma, lex_sense = sense_key.split('%')
|
lemma, lex_sense = sense_key.split('%')
|
||||||
|
@ -123,13 +123,13 @@ def setup_vocab(src_dir, dst_dir):
|
||||||
for lemma in lemmatizer(word.lower(), pos):
|
for lemma in lemmatizer(word.lower(), pos):
|
||||||
lemmas.append(lemma)
|
lemmas.append(lemma)
|
||||||
orth_senses.update(senses[lemma][pos])
|
orth_senses.update(senses[lemma][pos])
|
||||||
|
orth_senses.update(senses[word.lower()][ADV])
|
||||||
entry['senses'] = list(sorted(orth_senses))
|
entry['senses'] = list(sorted(orth_senses))
|
||||||
vocab[word] = entry
|
vocab[word] = entry
|
||||||
vocab.dump(str(dst_dir / 'lexemes.bin'))
|
vocab.dump(str(dst_dir / 'lexemes.bin'))
|
||||||
vocab.strings.dump(str(dst_dir / 'strings.txt'))
|
vocab.strings.dump(str(dst_dir / 'strings.txt'))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def main(lang_data_dir, corpora_dir, model_dir):
|
def main(lang_data_dir, corpora_dir, model_dir):
|
||||||
model_dir = Path(model_dir)
|
model_dir = Path(model_dir)
|
||||||
lang_data_dir = Path(lang_data_dir)
|
lang_data_dir = Path(lang_data_dir)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user