diff --git a/README.rst b/README.rst index e1432463b..bac46dec4 100644 --- a/README.rst +++ b/README.rst @@ -222,6 +222,22 @@ and ``--model`` are optional and enable additional tests: python -m pytest --vectors --model --slow +Download model to custom location +================================= + +You can specify where ``spacy.en.download`` and ``spacy.de.download`` download the language model +to using the ``--data-path`` or ``-d`` argument: + +.. code:: bash + + python -m spacy.en.download all --data-path /some/dir + + +If you choose to download to a custom location, you will need to tell spaCy where to load the model +from in order to use it. You can do this either by calling ``spacy.util.set_data_path()`` before +calling ``spacy.load()``, or by passing a ``path`` argument to the ``spacy.en.English`` or +``spacy.de.German`` constructors. + Changelog ========= diff --git a/spacy/de/download.py b/spacy/de/download.py index ba57c1d31..4f02f0474 100644 --- a/spacy/de/download.py +++ b/spacy/de/download.py @@ -4,9 +4,10 @@ from ..download import download @plac.annotations( force=("Force overwrite", "flag", "f", bool), + data_path=("Path to download model", "option", "d", str) ) -def main(data_size='all', force=False): - download('de', force) +def main(data_size='all', force=False, data_path=None): + download('de', force=force, data_path=data_path) if __name__ == '__main__': diff --git a/spacy/download.py b/spacy/download.py index 779a15f1e..9b2d65ffc 100644 --- a/spacy/download.py +++ b/spacy/download.py @@ -10,10 +10,19 @@ from . import about from . import util -def download(lang, force=False, fail_on_exist=True): +def download(lang, force=False, fail_on_exist=True, data_path=None): + if not data_path: + data_path = util.get_data_path() + + # spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object, + # but sputnik (which we're using below) doesn't use pathlib and requires + # its data_path parameters to be strings, so we coerce the data_path to a + # str here. + data_path = str(data_path) + try: pkg = sputnik.package(about.__title__, about.__version__, - about.__models__.get(lang, lang)) + about.__models__.get(lang, lang), data_path) if force: shutil.rmtree(pkg.path) elif fail_on_exist: @@ -24,15 +33,14 @@ def download(lang, force=False, fail_on_exist=True): pass package = sputnik.install(about.__title__, about.__version__, - about.__models__.get(lang, lang)) + about.__models__.get(lang, lang), data_path) try: sputnik.package(about.__title__, about.__version__, - about.__models__.get(lang, lang)) + about.__models__.get(lang, lang), data_path) except (PackageNotFoundException, CompatiblePackageNotFoundException): print("Model failed to install. Please run 'python -m " "spacy.%s.download --force'." % lang, file=sys.stderr) sys.exit(1) - data_path = util.get_data_path() print("Model successfully installed to %s" % data_path, file=sys.stderr) diff --git a/spacy/en/download.py b/spacy/en/download.py index bf3815769..7a2d58234 100644 --- a/spacy/en/download.py +++ b/spacy/en/download.py @@ -7,17 +7,18 @@ from .. import about @plac.annotations( force=("Force overwrite", "flag", "f", bool), + data_path=("Path to download model", "option", "d", str) ) -def main(data_size='all', force=False): +def main(data_size='all', force=False, data_path=None): if force: sputnik.purge(about.__title__, about.__version__) if data_size in ('all', 'parser'): print("Downloading parsing model") - download('en', False) + download('en', force=False, data_path=data_path) if data_size in ('all', 'glove'): print("Downloading GloVe vectors") - download('en_glove_cc_300_1m_vectors', False) + download('en_glove_cc_300_1m_vectors', force=False, data_path=data_path) if __name__ == '__main__': diff --git a/spacy/language.py b/spacy/language.py index 4681583c0..82d9b6d2c 100644 --- a/spacy/language.py +++ b/spacy/language.py @@ -246,9 +246,8 @@ class Language(object): self.end_training() def __init__(self, path=True, **overrides): - if 'data_dir' in overrides and 'path' not in overrides: + if 'data_dir' in overrides and path is True: raise ValueError("The argument 'data_dir' has been renamed to 'path'") - path = overrides.get('path', True) if isinstance(path, basestring): path = pathlib.Path(path) if path is True: diff --git a/website/docs/usage/index.jade b/website/docs/usage/index.jade index 75ae641ae..1c142592d 100644 --- a/website/docs/usage/index.jade +++ b/website/docs/usage/index.jade @@ -114,3 +114,20 @@ p python -m pip install -U pytest python -m pytest <spacy-directory> --vectors --model --slow + ++h(2, "custom-location") Download model to custom location + +p + | You can specify where #[code spacy.en.download] and + | #[code spacy.de.download] download the language model to using the + | #[code --data-path] or #[code -d] argument: + ++code(false, "bash"). + python -m spacy.en.download all --data-path /some/dir + +p + | If you choose to download to a custom location, you will need to tell + | spaCy where to load the model from in order to use it. You can do this + | either by calling #[code spacy.util.set_data_path()] before calling + | #[code spacy.load()], or by passing a #[code path] argument to the + | #[code spacy.en.English] or #[code spacy.de.German] constructors.