first idea for supporting multiple langs in download script

This commit is contained in:
Henning Peters 2016-03-24 11:19:43 +01:00
parent 8f870854c4
commit a7d7ea3afa
6 changed files with 76 additions and 56 deletions

View File

@ -1,8 +1,13 @@
from . import util
from .en import English
from .about import __models__
import importlib
def load(name, vectors=None, via=None):
return English(
if name not in __models__:
raise Exception('Model %s not found.' % name)
mod = importlib.import_module('.%s' % __models__[name]['module'], 'spacy')
return getattr(mod, __models__[name]['class'])(
package=util.get_package_by_name(name, via=via),
vectors_package=util.get_package_by_name(vectors, via=via))

View File

@ -10,4 +10,16 @@ __uri__ = 'https://spacy.io'
__author__ = 'Matthew Honnibal'
__email__ = 'matt@spacy.io'
__license__ = 'MIT'
__default_model__ = 'en>=1.0.0,<1.1.0'
__models__ = {
'en': {
'module': 'en',
'class': 'English',
'package': 'en>=1.0.0,<1.1.0',
},
'de': {
'module': 'de',
'class': 'German',
'package': 'de>=1.0.0,<1.1.0',
},
}
__default_model__ = 'en'

13
spacy/de/download.py Normal file
View File

@ -0,0 +1,13 @@
import plac
from ..download import download
@plac.annotations(
force=("Force overwrite", "flag", "f", bool),
)
def main(data_size='all', force=False):
download('de', force)
if __name__ == '__main__':
plac.call(main)

33
spacy/download.py Normal file
View File

@ -0,0 +1,33 @@
from __future__ import print_function
import sys
import sputnik
from sputnik.package_list import (PackageNotFoundException,
CompatiblePackageNotFoundException)
from . import about
def download(lang, force=False):
if force:
sputnik.purge(about.__title__, about.__version__)
try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package'])
print("Model already installed. Please run 'python -m "
"spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
sys.exit(1)
except (PackageNotFoundException, CompatiblePackageNotFoundException):
pass
package = sputnik.install(about.__title__, about.__version__, about.__models__[lang]['package'])
try:
sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package'])
except (PackageNotFoundException, CompatiblePackageNotFoundException):
print("Model failed to install. Please run 'python -m "
"spacy.%s.download --force'." % lang, file=sys.stderr)
sys.exit(1)
print("Model successfully installed.", file=sys.stderr)

View File

@ -1,57 +1,12 @@
from __future__ import print_function
import sys
import os
import shutil
import plac
import sputnik
from sputnik.package_list import (PackageNotFoundException,
CompatiblePackageNotFoundException)
from .. import about
def migrate(path):
data_path = os.path.join(path, 'data')
if os.path.isdir(data_path):
if os.path.islink(data_path):
os.unlink(data_path)
else:
shutil.rmtree(data_path)
for filename in os.listdir(path):
if filename.endswith('.tgz'):
os.unlink(os.path.join(path, filename))
from ..download import download
@plac.annotations(
force=("Force overwrite", "flag", "f", bool),
)
def main(data_size='all', force=False):
if force:
sputnik.purge(about.__title__, about.__version__)
try:
sputnik.package(about.__title__, about.__version__, about.__default_model__)
print("Model already installed. Please run 'python -m "
"spacy.en.download --force' to reinstall.", file=sys.stderr)
sys.exit(1)
except (PackageNotFoundException, CompatiblePackageNotFoundException):
pass
package = sputnik.install(about.__title__, about.__version__, about.__default_model__)
try:
sputnik.package(about.__title__, about.__version__, about.__default_model__)
except (PackageNotFoundException, CompatiblePackageNotFoundException):
print("Model failed to install. Please run 'python -m "
"spacy.en.download --force'.", file=sys.stderr)
sys.exit(1)
# FIXME clean up old-style packages
migrate(os.path.dirname(os.path.abspath(__file__)))
print("Model successfully installed.", file=sys.stderr)
download('en', force)
if __name__ == '__main__':

View File

@ -23,15 +23,17 @@ def get_package(data_dir):
def get_package_by_name(name=None, via=None):
try:
return sputnik.package(about.__title__, about.__version__,
name or about.__default_model__, data_path=via)
name or about.__models__[about.__default_model__]['package'],
data_path=via)
except PackageNotFoundException as e:
raise RuntimeError("Model %s not installed. Please run 'python -m "
"spacy.en.download' to install latest compatible "
"model." % name)
"spacy.%s.download' to install latest compatible "
"model." % (name, about.__models__[name]['module']))
except CompatiblePackageNotFoundException as e:
raise RuntimeError("Installed model is not compatible with spaCy "
"version. Please run 'python -m spacy.en.download "
"--force' to install latest compatible model.")
raise RuntimeError("Installed model %s is not compatible with spaCy "
"version. Please run 'python -m spacy.%s.download "
"--force' to install latest compatible model." %
(name, about.__models__[name]['module']))
def normalize_slice(length, start, stop, step=None):