Rename model command to init_model and fix formatting

This commit is contained in:
ines 2017-12-07 09:59:23 +01:00
parent 2feeb428d6
commit 82e80ff928
3 changed files with 13 additions and 17 deletions

View File

@ -7,7 +7,7 @@ if __name__ == '__main__':
import plac
import sys
from spacy.cli import download, link, info, package, train, convert
from spacy.cli import vocab, profile, evaluate, validate
from spacy.cli import vocab, init_model, profile, evaluate, validate
from spacy.util import prints
commands = {
@ -19,6 +19,7 @@ if __name__ == '__main__':
'convert': convert,
'package': package,
'vocab': vocab,
'init-model': init_model,
'profile': profile,
'validate': validate
}

View File

@ -7,4 +7,5 @@ from .train import train
from .evaluate import evaluate
from .convert import convert
from .vocab import make_vocab as vocab
from .init_model import init_model
from .validate import validate

View File

@ -3,18 +3,15 @@ from __future__ import unicode_literals
import plac
import math
from tqdm import tqdm
import spacy
import numpy
from ast import literal_eval
from pathlib import Path
from preshed.counter import PreshCounter
from spacy.compat import fix_text
from spacy.vectors import Vectors
from spacy.util import prints, ensure_path
from ...compat import fix_text
from ...vectors import Vectors
from ...util import prints, ensure_path, get_lang_class
@plac.annotations(
@ -29,7 +26,7 @@ from spacy.util import prints, ensure_path
prune_vectors=("optional: number of vectors to prune to",
"option", "V", int)
)
def main(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1):
def init_model(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune_vectors=-1):
if not freqs_loc.exists():
prints(freqs_loc, title="Can't find words frequencies file", exits=1)
clusters_loc = ensure_path(clusters_loc)
@ -48,8 +45,9 @@ def main(lang, output_dir, freqs_loc, clusters_loc=None, vectors_loc=None, prune
def create_model(lang, probs, oov_prob, clusters, vectors_data, vector_keys, prune_vectors):
prints("Creating model...")
nlp = spacy.blank(lang)
print("Creating model...")
lang_class = get_lang_class(lang)
nlp = lang_class()
for lexeme in nlp.vocab:
lexeme.rank = 0
@ -80,7 +78,7 @@ def create_model(lang, probs, oov_prob, clusters, vectors_data, vector_keys, pru
def read_vectors(vectors_loc):
prints("Reading vectors...")
print("Reading vectors...")
with vectors_loc.open() as f:
shape = tuple(int(size) for size in f.readline().split())
vectors_data = numpy.zeros(shape=shape, dtype='f')
@ -94,7 +92,7 @@ def read_vectors(vectors_loc):
def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
prints("Counting frequencies...")
print("Counting frequencies...")
counts = PreshCounter()
total = 0
with freqs_loc.open() as f:
@ -120,7 +118,7 @@ def read_freqs(freqs_loc, max_length=100, min_doc_freq=5, min_freq=50):
def read_clusters(clusters_loc):
prints("Reading clusters...")
print("Reading clusters...")
clusters = {}
with clusters_loc.open() as f:
for line in tqdm(f):
@ -144,7 +142,3 @@ def read_clusters(clusters_loc):
if word.upper() not in clusters:
clusters[word.upper()] = cluster
return clusters
if __name__ == '__main__':
plac.call(main)