mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-11 17:56:30 +03:00
Merge pull request #642 from ExplodingCabbage/specify-data-path
Let --data-path be specified when running download.py scripts
This commit is contained in:
commit
a7b5fba132
16
README.rst
16
README.rst
|
@ -222,6 +222,22 @@ and ``--model`` are optional and enable additional tests:
|
||||||
|
|
||||||
python -m pytest <spacy-directory> --vectors --model --slow
|
python -m pytest <spacy-directory> --vectors --model --slow
|
||||||
|
|
||||||
|
Download model to custom location
|
||||||
|
=================================
|
||||||
|
|
||||||
|
You can specify where ``spacy.en.download`` and ``spacy.de.download`` download the language model
|
||||||
|
to using the ``--data-path`` or ``-d`` argument:
|
||||||
|
|
||||||
|
.. code:: bash
|
||||||
|
|
||||||
|
python -m spacy.en.download all --data-path /some/dir
|
||||||
|
|
||||||
|
|
||||||
|
If you choose to download to a custom location, you will need to tell spaCy where to load the model
|
||||||
|
from in order to use it. You can do this either by calling ``spacy.util.set_data_path()`` before
|
||||||
|
calling ``spacy.load()``, or by passing a ``path`` argument to the ``spacy.en.English`` or
|
||||||
|
``spacy.de.German`` constructors.
|
||||||
|
|
||||||
Changelog
|
Changelog
|
||||||
=========
|
=========
|
||||||
|
|
||||||
|
|
|
@ -4,9 +4,10 @@ from ..download import download
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
force=("Force overwrite", "flag", "f", bool),
|
force=("Force overwrite", "flag", "f", bool),
|
||||||
|
data_path=("Path to download model", "option", "d", str)
|
||||||
)
|
)
|
||||||
def main(data_size='all', force=False):
|
def main(data_size='all', force=False, data_path=None):
|
||||||
download('de', force)
|
download('de', force=force, data_path=data_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -10,10 +10,19 @@ from . import about
|
||||||
from . import util
|
from . import util
|
||||||
|
|
||||||
|
|
||||||
def download(lang, force=False, fail_on_exist=True):
|
def download(lang, force=False, fail_on_exist=True, data_path=None):
|
||||||
|
if not data_path:
|
||||||
|
data_path = util.get_data_path()
|
||||||
|
|
||||||
|
# spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object,
|
||||||
|
# but sputnik (which we're using below) doesn't use pathlib and requires
|
||||||
|
# its data_path parameters to be strings, so we coerce the data_path to a
|
||||||
|
# str here.
|
||||||
|
data_path = str(data_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
pkg = sputnik.package(about.__title__, about.__version__,
|
pkg = sputnik.package(about.__title__, about.__version__,
|
||||||
about.__models__.get(lang, lang))
|
about.__models__.get(lang, lang), data_path)
|
||||||
if force:
|
if force:
|
||||||
shutil.rmtree(pkg.path)
|
shutil.rmtree(pkg.path)
|
||||||
elif fail_on_exist:
|
elif fail_on_exist:
|
||||||
|
@ -24,15 +33,14 @@ def download(lang, force=False, fail_on_exist=True):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
package = sputnik.install(about.__title__, about.__version__,
|
package = sputnik.install(about.__title__, about.__version__,
|
||||||
about.__models__.get(lang, lang))
|
about.__models__.get(lang, lang), data_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
sputnik.package(about.__title__, about.__version__,
|
sputnik.package(about.__title__, about.__version__,
|
||||||
about.__models__.get(lang, lang))
|
about.__models__.get(lang, lang), data_path)
|
||||||
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
except (PackageNotFoundException, CompatiblePackageNotFoundException):
|
||||||
print("Model failed to install. Please run 'python -m "
|
print("Model failed to install. Please run 'python -m "
|
||||||
"spacy.%s.download --force'." % lang, file=sys.stderr)
|
"spacy.%s.download --force'." % lang, file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
data_path = util.get_data_path()
|
|
||||||
print("Model successfully installed to %s" % data_path, file=sys.stderr)
|
print("Model successfully installed to %s" % data_path, file=sys.stderr)
|
||||||
|
|
|
@ -7,17 +7,18 @@ from .. import about
|
||||||
|
|
||||||
@plac.annotations(
|
@plac.annotations(
|
||||||
force=("Force overwrite", "flag", "f", bool),
|
force=("Force overwrite", "flag", "f", bool),
|
||||||
|
data_path=("Path to download model", "option", "d", str)
|
||||||
)
|
)
|
||||||
def main(data_size='all', force=False):
|
def main(data_size='all', force=False, data_path=None):
|
||||||
if force:
|
if force:
|
||||||
sputnik.purge(about.__title__, about.__version__)
|
sputnik.purge(about.__title__, about.__version__)
|
||||||
|
|
||||||
if data_size in ('all', 'parser'):
|
if data_size in ('all', 'parser'):
|
||||||
print("Downloading parsing model")
|
print("Downloading parsing model")
|
||||||
download('en', False)
|
download('en', force=False, data_path=data_path)
|
||||||
if data_size in ('all', 'glove'):
|
if data_size in ('all', 'glove'):
|
||||||
print("Downloading GloVe vectors")
|
print("Downloading GloVe vectors")
|
||||||
download('en_glove_cc_300_1m_vectors', False)
|
download('en_glove_cc_300_1m_vectors', force=False, data_path=data_path)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -246,9 +246,8 @@ class Language(object):
|
||||||
self.end_training()
|
self.end_training()
|
||||||
|
|
||||||
def __init__(self, path=True, **overrides):
|
def __init__(self, path=True, **overrides):
|
||||||
if 'data_dir' in overrides and 'path' not in overrides:
|
if 'data_dir' in overrides and path is True:
|
||||||
raise ValueError("The argument 'data_dir' has been renamed to 'path'")
|
raise ValueError("The argument 'data_dir' has been renamed to 'path'")
|
||||||
path = overrides.get('path', True)
|
|
||||||
if isinstance(path, basestring):
|
if isinstance(path, basestring):
|
||||||
path = pathlib.Path(path)
|
path = pathlib.Path(path)
|
||||||
if path is True:
|
if path is True:
|
||||||
|
|
|
@ -114,3 +114,20 @@ p
|
||||||
python -m pip install -U pytest
|
python -m pip install -U pytest
|
||||||
|
|
||||||
python -m pytest <spacy-directory> --vectors --model --slow
|
python -m pytest <spacy-directory> --vectors --model --slow
|
||||||
|
|
||||||
|
+h(2, "custom-location") Download model to custom location
|
||||||
|
|
||||||
|
p
|
||||||
|
| You can specify where #[code spacy.en.download] and
|
||||||
|
| #[code spacy.de.download] download the language model to using the
|
||||||
|
| #[code --data-path] or #[code -d] argument:
|
||||||
|
|
||||||
|
+code(false, "bash").
|
||||||
|
python -m spacy.en.download all --data-path /some/dir
|
||||||
|
|
||||||
|
p
|
||||||
|
| If you choose to download to a custom location, you will need to tell
|
||||||
|
| spaCy where to load the model from in order to use it. You can do this
|
||||||
|
| either by calling #[code spacy.util.set_data_path()] before calling
|
||||||
|
| #[code spacy.load()], or by passing a #[code path] argument to the
|
||||||
|
| #[code spacy.en.English] or #[code spacy.de.German] constructors.
|
||||||
|
|
Loading…
Reference in New Issue
Block a user