From a7d7ea3afa776132d5f46f2f1b59a4deeda1748c Mon Sep 17 00:00:00 2001 From: Henning Peters Date: Thu, 24 Mar 2016 11:19:43 +0100 Subject: [PATCH] first idea for supporting multiple langs in download script --- spacy/__init__.py | 9 ++++++-- spacy/about.py | 14 ++++++++++++- spacy/de/download.py | 13 ++++++++++++ spacy/download.py | 33 +++++++++++++++++++++++++++++ spacy/en/download.py | 49 ++------------------------------------------ spacy/util.py | 14 +++++++------ 6 files changed, 76 insertions(+), 56 deletions(-) create mode 100644 spacy/de/download.py create mode 100644 spacy/download.py diff --git a/spacy/__init__.py b/spacy/__init__.py index 70e72b7a1..b09ee3491 100644 --- a/spacy/__init__.py +++ b/spacy/__init__.py @@ -1,8 +1,13 @@ from . import util -from .en import English +from .about import __models__ +import importlib def load(name, vectors=None, via=None): - return English( + if name not in __models__: + raise Exception('Model %s not found.' % name) + + mod = importlib.import_module('.%s' % __models__[name]['module'], 'spacy') + return getattr(mod, __models__[name]['class'])( package=util.get_package_by_name(name, via=via), vectors_package=util.get_package_by_name(vectors, via=via)) diff --git a/spacy/about.py b/spacy/about.py index 3814b8d61..eed7c3f81 100644 --- a/spacy/about.py +++ b/spacy/about.py @@ -10,4 +10,16 @@ __uri__ = 'https://spacy.io' __author__ = 'Matthew Honnibal' __email__ = 'matt@spacy.io' __license__ = 'MIT' -__default_model__ = 'en>=1.0.0,<1.1.0' +__models__ = { + 'en': { + 'module': 'en', + 'class': 'English', + 'package': 'en>=1.0.0,<1.1.0', + }, + 'de': { + 'module': 'de', + 'class': 'German', + 'package': 'de>=1.0.0,<1.1.0', + }, +} +__default_model__ = 'en' diff --git a/spacy/de/download.py b/spacy/de/download.py new file mode 100644 index 000000000..ba57c1d31 --- /dev/null +++ b/spacy/de/download.py @@ -0,0 +1,13 @@ +import plac +from ..download import download + + +@plac.annotations( + force=("Force overwrite", "flag", "f", bool), +) +def main(data_size='all', force=False): + download('de', force) + + +if __name__ == '__main__': + plac.call(main) diff --git a/spacy/download.py b/spacy/download.py new file mode 100644 index 000000000..537c06872 --- /dev/null +++ b/spacy/download.py @@ -0,0 +1,33 @@ +from __future__ import print_function + +import sys + +import sputnik +from sputnik.package_list import (PackageNotFoundException, + CompatiblePackageNotFoundException) + +from . import about + + +def download(lang, force=False): + if force: + sputnik.purge(about.__title__, about.__version__) + + try: + sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package']) + print("Model already installed. Please run 'python -m " + "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr) + sys.exit(1) + except (PackageNotFoundException, CompatiblePackageNotFoundException): + pass + + package = sputnik.install(about.__title__, about.__version__, about.__models__[lang]['package']) + + try: + sputnik.package(about.__title__, about.__version__, about.__models__[lang]['package']) + except (PackageNotFoundException, CompatiblePackageNotFoundException): + print("Model failed to install. Please run 'python -m " + "spacy.%s.download --force'." % lang, file=sys.stderr) + sys.exit(1) + + print("Model successfully installed.", file=sys.stderr) diff --git a/spacy/en/download.py b/spacy/en/download.py index 993b8b16d..f0c23b088 100644 --- a/spacy/en/download.py +++ b/spacy/en/download.py @@ -1,57 +1,12 @@ -from __future__ import print_function - -import sys -import os -import shutil - import plac -import sputnik -from sputnik.package_list import (PackageNotFoundException, - CompatiblePackageNotFoundException) - -from .. import about - - -def migrate(path): - data_path = os.path.join(path, 'data') - if os.path.isdir(data_path): - if os.path.islink(data_path): - os.unlink(data_path) - else: - shutil.rmtree(data_path) - for filename in os.listdir(path): - if filename.endswith('.tgz'): - os.unlink(os.path.join(path, filename)) +from ..download import download @plac.annotations( force=("Force overwrite", "flag", "f", bool), ) def main(data_size='all', force=False): - if force: - sputnik.purge(about.__title__, about.__version__) - - try: - sputnik.package(about.__title__, about.__version__, about.__default_model__) - print("Model already installed. Please run 'python -m " - "spacy.en.download --force' to reinstall.", file=sys.stderr) - sys.exit(1) - except (PackageNotFoundException, CompatiblePackageNotFoundException): - pass - - package = sputnik.install(about.__title__, about.__version__, about.__default_model__) - - try: - sputnik.package(about.__title__, about.__version__, about.__default_model__) - except (PackageNotFoundException, CompatiblePackageNotFoundException): - print("Model failed to install. Please run 'python -m " - "spacy.en.download --force'.", file=sys.stderr) - sys.exit(1) - - # FIXME clean up old-style packages - migrate(os.path.dirname(os.path.abspath(__file__))) - - print("Model successfully installed.", file=sys.stderr) + download('en', force) if __name__ == '__main__': diff --git a/spacy/util.py b/spacy/util.py index bcc55c656..37d3b7bab 100644 --- a/spacy/util.py +++ b/spacy/util.py @@ -23,15 +23,17 @@ def get_package(data_dir): def get_package_by_name(name=None, via=None): try: return sputnik.package(about.__title__, about.__version__, - name or about.__default_model__, data_path=via) + name or about.__models__[about.__default_model__]['package'], + data_path=via) except PackageNotFoundException as e: raise RuntimeError("Model %s not installed. Please run 'python -m " - "spacy.en.download' to install latest compatible " - "model." % name) + "spacy.%s.download' to install latest compatible " + "model." % (name, about.__models__[name]['module'])) except CompatiblePackageNotFoundException as e: - raise RuntimeError("Installed model is not compatible with spaCy " - "version. Please run 'python -m spacy.en.download " - "--force' to install latest compatible model.") + raise RuntimeError("Installed model %s is not compatible with spaCy " + "version. Please run 'python -m spacy.%s.download " + "--force' to install latest compatible model." % + (name, about.__models__[name]['module'])) def normalize_slice(length, start, stop, step=None):