mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge pull request #642 from ExplodingCabbage/specify-data-path
Let --data-path be specified when running download.py scripts
This commit is contained in:
		
						commit
						a7b5fba132
					
				
							
								
								
									
										16
									
								
								README.rst
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								README.rst
									
									
									
									
									
								
							| 
						 | 
					@ -222,6 +222,22 @@ and ``--model`` are optional and enable additional tests:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    python -m pytest <spacy-directory> --vectors --model --slow
 | 
					    python -m pytest <spacy-directory> --vectors --model --slow
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Download model to custom location
 | 
				
			||||||
 | 
					=================================
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					You can specify where ``spacy.en.download`` and ``spacy.de.download`` download the language model
 | 
				
			||||||
 | 
					to using the ``--data-path`` or ``-d`` argument:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					.. code:: bash
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    python -m spacy.en.download all --data-path /some/dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					If you choose to download to a custom location, you will need to tell spaCy where to load the model
 | 
				
			||||||
 | 
					from in order to use it. You can do this either by calling ``spacy.util.set_data_path()`` before
 | 
				
			||||||
 | 
					calling ``spacy.load()``, or by passing a ``path`` argument to the ``spacy.en.English`` or
 | 
				
			||||||
 | 
					``spacy.de.German`` constructors.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Changelog
 | 
					Changelog
 | 
				
			||||||
=========
 | 
					=========
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,9 +4,10 @@ from ..download import download
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@plac.annotations(
 | 
					@plac.annotations(
 | 
				
			||||||
    force=("Force overwrite", "flag", "f", bool),
 | 
					    force=("Force overwrite", "flag", "f", bool),
 | 
				
			||||||
 | 
					    data_path=("Path to download model", "option", "d", str)
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
def main(data_size='all', force=False):
 | 
					def main(data_size='all', force=False, data_path=None):
 | 
				
			||||||
    download('de', force)
 | 
					    download('de', force=force, data_path=data_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,10 +10,19 @@ from . import about
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download(lang, force=False, fail_on_exist=True):
 | 
					def download(lang, force=False, fail_on_exist=True, data_path=None):
 | 
				
			||||||
 | 
					    if not data_path:
 | 
				
			||||||
 | 
					        data_path = util.get_data_path()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object,
 | 
				
			||||||
 | 
					    # but sputnik (which we're using below) doesn't use pathlib and requires
 | 
				
			||||||
 | 
					    # its data_path parameters to be strings, so we coerce the data_path to a
 | 
				
			||||||
 | 
					    # str here.
 | 
				
			||||||
 | 
					    data_path = str(data_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        pkg = sputnik.package(about.__title__, about.__version__,
 | 
					        pkg = sputnik.package(about.__title__, about.__version__,
 | 
				
			||||||
                        about.__models__.get(lang, lang))
 | 
					                        about.__models__.get(lang, lang), data_path)
 | 
				
			||||||
        if force:
 | 
					        if force:
 | 
				
			||||||
            shutil.rmtree(pkg.path)
 | 
					            shutil.rmtree(pkg.path)
 | 
				
			||||||
        elif fail_on_exist:
 | 
					        elif fail_on_exist:
 | 
				
			||||||
| 
						 | 
					@ -24,15 +33,14 @@ def download(lang, force=False, fail_on_exist=True):
 | 
				
			||||||
        pass
 | 
					        pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    package = sputnik.install(about.__title__, about.__version__,
 | 
					    package = sputnik.install(about.__title__, about.__version__,
 | 
				
			||||||
                              about.__models__.get(lang, lang))
 | 
					                              about.__models__.get(lang, lang), data_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					    try:
 | 
				
			||||||
        sputnik.package(about.__title__, about.__version__,
 | 
					        sputnik.package(about.__title__, about.__version__,
 | 
				
			||||||
                        about.__models__.get(lang, lang))
 | 
					                        about.__models__.get(lang, lang), data_path)
 | 
				
			||||||
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
 | 
					    except (PackageNotFoundException, CompatiblePackageNotFoundException):
 | 
				
			||||||
        print("Model failed to install. Please run 'python -m "
 | 
					        print("Model failed to install. Please run 'python -m "
 | 
				
			||||||
              "spacy.%s.download --force'." % lang, file=sys.stderr)
 | 
					              "spacy.%s.download --force'." % lang, file=sys.stderr)
 | 
				
			||||||
        sys.exit(1)
 | 
					        sys.exit(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    data_path = util.get_data_path()
 | 
					 | 
				
			||||||
    print("Model successfully installed to %s" % data_path, file=sys.stderr)
 | 
					    print("Model successfully installed to %s" % data_path, file=sys.stderr)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,17 +7,18 @@ from .. import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@plac.annotations(
 | 
					@plac.annotations(
 | 
				
			||||||
    force=("Force overwrite", "flag", "f", bool),
 | 
					    force=("Force overwrite", "flag", "f", bool),
 | 
				
			||||||
 | 
					    data_path=("Path to download model", "option", "d", str)
 | 
				
			||||||
)
 | 
					)
 | 
				
			||||||
def main(data_size='all', force=False):
 | 
					def main(data_size='all', force=False, data_path=None):
 | 
				
			||||||
    if force:
 | 
					    if force:
 | 
				
			||||||
        sputnik.purge(about.__title__, about.__version__)
 | 
					        sputnik.purge(about.__title__, about.__version__)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if data_size in ('all', 'parser'):
 | 
					    if data_size in ('all', 'parser'):
 | 
				
			||||||
        print("Downloading parsing model")
 | 
					        print("Downloading parsing model")
 | 
				
			||||||
        download('en', False)
 | 
					        download('en', force=False, data_path=data_path)
 | 
				
			||||||
    if data_size in ('all', 'glove'):
 | 
					    if data_size in ('all', 'glove'):
 | 
				
			||||||
        print("Downloading GloVe vectors")
 | 
					        print("Downloading GloVe vectors")
 | 
				
			||||||
        download('en_glove_cc_300_1m_vectors', False)
 | 
					        download('en_glove_cc_300_1m_vectors', force=False, data_path=data_path)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
if __name__ == '__main__':
 | 
					if __name__ == '__main__':
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -246,9 +246,8 @@ class Language(object):
 | 
				
			||||||
        self.end_training()
 | 
					        self.end_training()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, path=True, **overrides):
 | 
					    def __init__(self, path=True, **overrides):
 | 
				
			||||||
        if 'data_dir' in overrides and 'path' not in overrides:
 | 
					        if 'data_dir' in overrides and path is True:
 | 
				
			||||||
            raise ValueError("The argument 'data_dir' has been renamed to 'path'")
 | 
					            raise ValueError("The argument 'data_dir' has been renamed to 'path'")
 | 
				
			||||||
        path = overrides.get('path', True)
 | 
					 | 
				
			||||||
        if isinstance(path, basestring):
 | 
					        if isinstance(path, basestring):
 | 
				
			||||||
            path = pathlib.Path(path)
 | 
					            path = pathlib.Path(path)
 | 
				
			||||||
        if path is True:
 | 
					        if path is True:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -114,3 +114,20 @@ p
 | 
				
			||||||
    python -m pip install -U pytest
 | 
					    python -m pip install -U pytest
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    python -m pytest <spacy-directory> --vectors --model --slow
 | 
					    python -m pytest <spacy-directory> --vectors --model --slow
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+h(2, "custom-location") Download model to custom location
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p
 | 
				
			||||||
 | 
					    |   You can specify where #[code spacy.en.download] and
 | 
				
			||||||
 | 
					    |  #[code spacy.de.download] download the language model to using the
 | 
				
			||||||
 | 
					    |  #[code --data-path] or #[code -d] argument:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					+code(false, "bash").
 | 
				
			||||||
 | 
					    python -m spacy.en.download all --data-path /some/dir
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					p
 | 
				
			||||||
 | 
					    |  If you choose to download to a custom location, you will need to tell
 | 
				
			||||||
 | 
					    |  spaCy where to load the model from in order to use it. You can do this
 | 
				
			||||||
 | 
					    |  either by calling #[code spacy.util.set_data_path()] before calling
 | 
				
			||||||
 | 
					    |  #[code spacy.load()], or by passing a #[code path] argument to the
 | 
				
			||||||
 | 
					    |  #[code spacy.en.English] or #[code spacy.de.German] constructors.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user