mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 01:48:04 +03:00 
			
		
		
		
	Refactor download script and about.py to use new download method
This commit is contained in:
		
							parent
							
								
									f5d1a39a5b
								
							
						
					
					
						commit
						58b884b6d4
					
				| 
						 | 
					@ -1,5 +1,4 @@
 | 
				
			||||||
# inspired from:
 | 
					# inspired from:
 | 
				
			||||||
 | 
					 | 
				
			||||||
# https://python-packaging-user-guide.readthedocs.org/en/latest/single_source_version/
 | 
					# https://python-packaging-user-guide.readthedocs.org/en/latest/single_source_version/
 | 
				
			||||||
# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
 | 
					# https://github.com/pypa/warehouse/blob/master/warehouse/__about__.py
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -10,7 +9,8 @@ __uri__ = 'https://spacy.io'
 | 
				
			||||||
__author__ = 'Matthew Honnibal'
 | 
					__author__ = 'Matthew Honnibal'
 | 
				
			||||||
__email__ = 'matt@explosion.ai'
 | 
					__email__ = 'matt@explosion.ai'
 | 
				
			||||||
__license__ = 'MIT'
 | 
					__license__ = 'MIT'
 | 
				
			||||||
__models__ = {
 | 
					
 | 
				
			||||||
    'en': 'en>=1.1.0,<1.2.0',
 | 
					__docs__ = 'https://spacy.io/docs/usage'
 | 
				
			||||||
    'de': 'de>=1.0.0,<1.1.0',
 | 
					__download_url__ = 'https://github.com/explosion/spacy-models/releases/download'
 | 
				
			||||||
}
 | 
					__compatibility__ = 'https://raw.githubusercontent.com/explosion/spacy-models/master/compatibility.json?token=ANAt54fi5zcUtnwGhMLw2klWwcAyHkZGks5Y0nw1wA%3D%3D'
 | 
				
			||||||
 | 
					__shortcuts__ = {'en': 'en_core_web_md', 'de': 'de_core_web_md', 'vectors': 'en_vectors_glove_md'}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,47 +1,79 @@
 | 
				
			||||||
from __future__ import print_function
 | 
					# coding: utf8
 | 
				
			||||||
 | 
					from __future__ import unicode_literals
 | 
				
			||||||
 | 
					
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import shutil
 | 
					import pip
 | 
				
			||||||
 | 
					import plac
 | 
				
			||||||
import sputnik
 | 
					import requests
 | 
				
			||||||
from sputnik.package_list import (PackageNotFoundException,
 | 
					from os import path
 | 
				
			||||||
                                  CompatiblePackageNotFoundException)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from . import about
 | 
					from . import about
 | 
				
			||||||
from . import util
 | 
					from . import util
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def download(lang, force=False, fail_on_exist=True, data_path=None):
 | 
					@plac.annotations(
 | 
				
			||||||
    if not data_path:
 | 
					    model=("Model to download", "positional", None, str),
 | 
				
			||||||
        data_path = util.get_data_path(require_exists=False)
 | 
					    direct=("Force direct download", "flag", "d", bool)
 | 
				
			||||||
 | 
					)
 | 
				
			||||||
 | 
					def download(model=None, direct=False):
 | 
				
			||||||
 | 
					    check_error_depr(model)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # spaCy uses pathlib, and util.get_data_path returns a pathlib.Path object,
 | 
					    if direct:
 | 
				
			||||||
    # but sputnik (which we're using below) doesn't use pathlib and requires
 | 
					        download_model('{m}/{m}.tar.gz'.format(m=model))
 | 
				
			||||||
    # its data_path parameters to be strings, so we coerce the data_path to a
 | 
					    else:
 | 
				
			||||||
    # str here.
 | 
					        model = about.__shortcuts__[model] if model in about.__shortcuts__ else model
 | 
				
			||||||
    data_path = str(data_path)
 | 
					        compatibility = get_compatibility()
 | 
				
			||||||
 | 
					        version = get_version(model, compatibility)
 | 
				
			||||||
 | 
					        download_model('{m}-{v}/{m}-{v}.tar.gz'.format(m=model, v=version))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					
 | 
				
			||||||
        pkg = sputnik.package(about.__title__, about.__version__,
 | 
					def get_compatibility():
 | 
				
			||||||
                        about.__models__.get(lang, lang), data_path)
 | 
					    version = about.__version__
 | 
				
			||||||
        if force:
 | 
					    r = requests.get(about.__compatibility__)
 | 
				
			||||||
            shutil.rmtree(pkg.path)
 | 
					    if r.status_code != 200:
 | 
				
			||||||
        elif fail_on_exist:
 | 
					        exit("Couldn't fetch compatibility table. Please find the right model for "
 | 
				
			||||||
            print("Model already installed. Please run 'python -m "
 | 
					             "your spaCy installation (v{v}), and download it manually:".format(v=version),
 | 
				
			||||||
                  "spacy.%s.download --force' to reinstall." % lang, file=sys.stderr)
 | 
					             "python -m spacy.download [full model name + version] --direct",
 | 
				
			||||||
 | 
					             title="Server error ({c})".format(c=r.status_code))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    comp = r.json()['spacy']
 | 
				
			||||||
 | 
					    if version not in comp:
 | 
				
			||||||
 | 
					        exit("No compatible models found for v{v} of spaCy.".format(v=version),
 | 
				
			||||||
 | 
					             title="Compatibility error")
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        return comp[version]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_version(model, comp):
 | 
				
			||||||
 | 
					    if model not in comp:
 | 
				
			||||||
 | 
					        exit("No compatible model found for "
 | 
				
			||||||
 | 
					             "{m} (spaCy v{v}).".format(m=model, v=about.__version__),
 | 
				
			||||||
 | 
					             title="Compatibility error")
 | 
				
			||||||
 | 
					    return comp[model][0]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def download_model(filename):
 | 
				
			||||||
 | 
					    util.print_msg("Downloading {f}".format(f=filename))
 | 
				
			||||||
 | 
					    download_url = path.join(about.__download_url__, filename)
 | 
				
			||||||
 | 
					    pip.main(['install', download_url])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def check_error_depr(model):
 | 
				
			||||||
 | 
					    if not model:
 | 
				
			||||||
 | 
					        exit("python -m spacy.download [name or shortcut]",
 | 
				
			||||||
 | 
					             title="Missing model name or shortcut")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if model == 'all':
 | 
				
			||||||
 | 
					        exit("As of v1.7.0, the download all command is deprecated. Please "
 | 
				
			||||||
 | 
					             "download the models individually via spacy.download [model name] "
 | 
				
			||||||
 | 
					             "or pip install. For more info on this, see the "
 | 
				
			||||||
 | 
					             "documentation: {d}".format(d=about.__docs__),
 | 
				
			||||||
 | 
					             title="Deprecated command")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def exit(*messages, **kwargs):
 | 
				
			||||||
 | 
					    util.print_msg(*messages, **kwargs)
 | 
				
			||||||
    sys.exit(0)
 | 
					    sys.exit(0)
 | 
				
			||||||
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
 | 
					 | 
				
			||||||
        pass
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    package = sputnik.install(about.__title__, about.__version__,
 | 
					 | 
				
			||||||
                              about.__models__.get(lang, lang), data_path)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    try:
 | 
					if __name__ == '__main__':
 | 
				
			||||||
        sputnik.package(about.__title__, about.__version__,
 | 
					    plac.call(download)
 | 
				
			||||||
                        about.__models__.get(lang, lang), data_path)
 | 
					 | 
				
			||||||
    except (PackageNotFoundException, CompatiblePackageNotFoundException):
 | 
					 | 
				
			||||||
        print("Model failed to install. Please run 'python -m "
 | 
					 | 
				
			||||||
              "spacy.%s.download --force'." % lang, file=sys.stderr)
 | 
					 | 
				
			||||||
        sys.exit(1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    print("Model successfully installed to %s" % data_path, file=sys.stderr)
 | 
					 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user