mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-04 09:57:26 +03:00 
			
		
		
		
	Merge pull request #5028 from explosion/refactor/remove-symlinks
Remove symlinks, data dir and related stuff
This commit is contained in:
		
						commit
						8137b24928
					
				| 
						 | 
					@ -1,6 +1,7 @@
 | 
				
			||||||
 | 
					from wasabi import msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .download import download  # noqa: F401
 | 
					from .download import download  # noqa: F401
 | 
				
			||||||
from .info import info  # noqa: F401
 | 
					from .info import info  # noqa: F401
 | 
				
			||||||
from .link import link  # noqa: F401
 | 
					 | 
				
			||||||
from .package import package  # noqa: F401
 | 
					from .package import package  # noqa: F401
 | 
				
			||||||
from .profile import profile  # noqa: F401
 | 
					from .profile import profile  # noqa: F401
 | 
				
			||||||
from .train import train  # noqa: F401
 | 
					from .train import train  # noqa: F401
 | 
				
			||||||
| 
						 | 
					@ -11,3 +12,10 @@ from .evaluate import evaluate  # noqa: F401
 | 
				
			||||||
from .convert import convert  # noqa: F401
 | 
					from .convert import convert  # noqa: F401
 | 
				
			||||||
from .init_model import init_model  # noqa: F401
 | 
					from .init_model import init_model  # noqa: F401
 | 
				
			||||||
from .validate import validate  # noqa: F401
 | 
					from .validate import validate  # noqa: F401
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def link(*args, **kwargs):
 | 
				
			||||||
 | 
					    msg.warn(
 | 
				
			||||||
 | 
					        "As of spaCy v3.0, model symlinks are deprecated. You can load models "
 | 
				
			||||||
 | 
					        "using their full names or from a directory path."
 | 
				
			||||||
 | 
					    )
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,8 +4,6 @@ import subprocess
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
from wasabi import msg
 | 
					from wasabi import msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from .link import link
 | 
					 | 
				
			||||||
from ..util import get_package_path
 | 
					 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -15,9 +13,9 @@ def download(
 | 
				
			||||||
    *pip_args: ("Additional arguments to be passed to `pip install` on model install"),
 | 
					    *pip_args: ("Additional arguments to be passed to `pip install` on model install"),
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Download compatible model from default download path using pip. Model
 | 
					    Download compatible model from default download path using pip. If --direct
 | 
				
			||||||
    can be shortcut, model name or, if --direct flag is set, full model name
 | 
					    flag is set, the command expects the full model name with version.
 | 
				
			||||||
    with version. For direct downloads, the compatibility check will be skipped.
 | 
					    For direct downloads, the compatibility check will be skipped.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if not require_package("spacy") and "--no-deps" not in pip_args:
 | 
					    if not require_package("spacy") and "--no-deps" not in pip_args:
 | 
				
			||||||
        msg.warn(
 | 
					        msg.warn(
 | 
				
			||||||
| 
						 | 
					@ -47,28 +45,6 @@ def download(
 | 
				
			||||||
            "Download and installation successful",
 | 
					            "Download and installation successful",
 | 
				
			||||||
            f"You can now load the model via spacy.load('{model_name}')",
 | 
					            f"You can now load the model via spacy.load('{model_name}')",
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
        # Only create symlink if the model is installed via a shortcut like 'en'.
 | 
					 | 
				
			||||||
        # There's no real advantage over an additional symlink for en_core_web_sm
 | 
					 | 
				
			||||||
        # and if anything, it's more error prone and causes more confusion.
 | 
					 | 
				
			||||||
        if model in shortcuts:
 | 
					 | 
				
			||||||
            try:
 | 
					 | 
				
			||||||
                # Get package path here because link uses
 | 
					 | 
				
			||||||
                # pip.get_installed_distributions() to check if model is a
 | 
					 | 
				
			||||||
                # package, which fails if model was just installed via
 | 
					 | 
				
			||||||
                # subprocess
 | 
					 | 
				
			||||||
                package_path = get_package_path(model_name)
 | 
					 | 
				
			||||||
                link(model_name, model, force=True, model_path=package_path)
 | 
					 | 
				
			||||||
            except:  # noqa: E722
 | 
					 | 
				
			||||||
                # Dirty, but since spacy.download and the auto-linking is
 | 
					 | 
				
			||||||
                # mostly a convenience wrapper, it's best to show a success
 | 
					 | 
				
			||||||
                # message and loading instructions, even if linking fails.
 | 
					 | 
				
			||||||
                msg.warn(
 | 
					 | 
				
			||||||
                    "Download successful but linking failed",
 | 
					 | 
				
			||||||
                    f"Creating a shortcut link for '{model}' didn't work (maybe you "
 | 
					 | 
				
			||||||
                    f"don't have admin permissions?), but you can still load "
 | 
					 | 
				
			||||||
                    f"the model via its full package name: "
 | 
					 | 
				
			||||||
                    f"nlp = spacy.load('{model_name}')",
 | 
					 | 
				
			||||||
                )
 | 
					 | 
				
			||||||
        # If a model is downloaded and then loaded within the same process, our
 | 
					        # If a model is downloaded and then loaded within the same process, our
 | 
				
			||||||
        # is_package check currently fails, because pkg_resources.working_set
 | 
					        # is_package check currently fails, because pkg_resources.working_set
 | 
				
			||||||
        # is not refreshed automatically (see #3923). We're trying to work
 | 
					        # is not refreshed automatically (see #3923). We're trying to work
 | 
				
			||||||
| 
						 | 
					@ -114,8 +90,7 @@ def get_version(model, comp):
 | 
				
			||||||
    model = model.rsplit(".dev", 1)[0]
 | 
					    model = model.rsplit(".dev", 1)[0]
 | 
				
			||||||
    if model not in comp:
 | 
					    if model not in comp:
 | 
				
			||||||
        msg.fail(
 | 
					        msg.fail(
 | 
				
			||||||
            f"No compatible model found for '{model}' "
 | 
					            f"No compatible model found for '{model}' (spaCy v{about.__version__})",
 | 
				
			||||||
            f"(spaCy v{about.__version__}).",
 | 
					 | 
				
			||||||
            exits=1,
 | 
					            exits=1,
 | 
				
			||||||
        )
 | 
					        )
 | 
				
			||||||
    return comp[model][0]
 | 
					    return comp[model][0]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,25 +3,26 @@ from pathlib import Path
 | 
				
			||||||
from wasabi import msg
 | 
					from wasabi import msg
 | 
				
			||||||
import srsly
 | 
					import srsly
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					from .validate import get_model_pkgs
 | 
				
			||||||
from .. import util
 | 
					from .. import util
 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def info(
 | 
					def info(
 | 
				
			||||||
    model: ("Optional shortcut link of model", "positional", None, str) = None,
 | 
					    model: ("Optional model name", "positional", None, str) = None,
 | 
				
			||||||
    markdown: ("Generate Markdown for GitHub issues", "flag", "md", str) = False,
 | 
					    markdown: ("Generate Markdown for GitHub issues", "flag", "md", str) = False,
 | 
				
			||||||
    silent: ("Don't print anything (just return)", "flag", "s") = False,
 | 
					    silent: ("Don't print anything (just return)", "flag", "s") = False,
 | 
				
			||||||
):
 | 
					):
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    Print info about spaCy installation. If a model shortcut link is
 | 
					    Print info about spaCy installation. If a model is speficied as an argument,
 | 
				
			||||||
    speficied as an argument, print model information. Flag --markdown
 | 
					    print model information. Flag --markdown prints details in Markdown for easy
 | 
				
			||||||
    prints details in Markdown for easy copy-pasting to GitHub issues.
 | 
					    copy-pasting to GitHub issues.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    if model:
 | 
					    if model:
 | 
				
			||||||
        if util.is_package(model):
 | 
					        if util.is_package(model):
 | 
				
			||||||
            model_path = util.get_package_path(model)
 | 
					            model_path = util.get_package_path(model)
 | 
				
			||||||
        else:
 | 
					        else:
 | 
				
			||||||
            model_path = util.get_data_path() / model
 | 
					            model_path = model
 | 
				
			||||||
        meta_path = model_path / "meta.json"
 | 
					        meta_path = model_path / "meta.json"
 | 
				
			||||||
        if not meta_path.is_file():
 | 
					        if not meta_path.is_file():
 | 
				
			||||||
            msg.fail("Can't find model meta.json", meta_path, exits=1)
 | 
					            msg.fail("Can't find model meta.json", meta_path, exits=1)
 | 
				
			||||||
| 
						 | 
					@ -41,12 +42,13 @@ def info(
 | 
				
			||||||
            else:
 | 
					            else:
 | 
				
			||||||
                msg.table(model_meta, title=title)
 | 
					                msg.table(model_meta, title=title)
 | 
				
			||||||
        return meta
 | 
					        return meta
 | 
				
			||||||
 | 
					    all_models, _ = get_model_pkgs()
 | 
				
			||||||
    data = {
 | 
					    data = {
 | 
				
			||||||
        "spaCy version": about.__version__,
 | 
					        "spaCy version": about.__version__,
 | 
				
			||||||
        "Location": str(Path(__file__).parent.parent),
 | 
					        "Location": str(Path(__file__).parent.parent),
 | 
				
			||||||
        "Platform": platform.platform(),
 | 
					        "Platform": platform.platform(),
 | 
				
			||||||
        "Python version": platform.python_version(),
 | 
					        "Python version": platform.python_version(),
 | 
				
			||||||
        "Models": list_models(),
 | 
					        "Models": ", ".join(model["name"] for model in all_models.values()),
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    if not silent:
 | 
					    if not silent:
 | 
				
			||||||
        title = "Info about spaCy"
 | 
					        title = "Info about spaCy"
 | 
				
			||||||
| 
						 | 
					@ -57,19 +59,6 @@ def info(
 | 
				
			||||||
    return data
 | 
					    return data
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def list_models():
 | 
					 | 
				
			||||||
    def exclude_dir(dir_name):
 | 
					 | 
				
			||||||
        # exclude common cache directories and hidden directories
 | 
					 | 
				
			||||||
        exclude = ("cache", "pycache", "__pycache__")
 | 
					 | 
				
			||||||
        return dir_name in exclude or dir_name.startswith(".")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    data_path = util.get_data_path()
 | 
					 | 
				
			||||||
    if data_path:
 | 
					 | 
				
			||||||
        models = [f.parts[-1] for f in data_path.iterdir() if f.is_dir()]
 | 
					 | 
				
			||||||
        return ", ".join([m for m in models if not exclude_dir(m)])
 | 
					 | 
				
			||||||
    return "-"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def print_markdown(data, title=None):
 | 
					def print_markdown(data, title=None):
 | 
				
			||||||
    """Print data in GitHub-flavoured Markdown format for issues etc.
 | 
					    """Print data in GitHub-flavoured Markdown format for issues etc.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,73 +0,0 @@
 | 
				
			||||||
from pathlib import Path
 | 
					 | 
				
			||||||
from wasabi import msg
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
from ..compat import symlink_to
 | 
					 | 
				
			||||||
from .. import util
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def link(
 | 
					 | 
				
			||||||
    origin: ("package name or local path to model", "positional", None, str),
 | 
					 | 
				
			||||||
    link_name: ("name of shortuct link to create", "positional", None, str),
 | 
					 | 
				
			||||||
    force: ("force overwriting of existing link", "flag", "f", bool) = False,
 | 
					 | 
				
			||||||
    model_path=None,
 | 
					 | 
				
			||||||
):
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    Create a symlink for models within the spacy/data directory. Accepts
 | 
					 | 
				
			||||||
    either the name of a pip package, or the local path to the model data
 | 
					 | 
				
			||||||
    directory. Linking models allows loading them via spacy.load(link_name).
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if util.is_package(origin):
 | 
					 | 
				
			||||||
        model_path = util.get_package_path(origin)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        model_path = Path(origin) if model_path is None else Path(model_path)
 | 
					 | 
				
			||||||
    if not model_path.exists():
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            "Can't locate model data",
 | 
					 | 
				
			||||||
            f"The data should be located in {model_path}",
 | 
					 | 
				
			||||||
            exits=1,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    data_path = util.get_data_path()
 | 
					 | 
				
			||||||
    if not data_path or not data_path.exists():
 | 
					 | 
				
			||||||
        spacy_loc = Path(__file__).parent.parent
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            f"Can't find the spaCy data path to create model symlink",
 | 
					 | 
				
			||||||
            f"Make sure a directory `/data` exists within your spaCy "
 | 
					 | 
				
			||||||
            f"installation and try again. The data directory should be located "
 | 
					 | 
				
			||||||
            f"here: {spacy_loc}",
 | 
					 | 
				
			||||||
            exits=1,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    link_path = util.get_data_path() / link_name
 | 
					 | 
				
			||||||
    if link_path.is_symlink() and not force:
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            f"Link '{link_name}' already exists",
 | 
					 | 
				
			||||||
            "To overwrite an existing link, use the --force flag",
 | 
					 | 
				
			||||||
            exits=1,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    elif link_path.is_symlink():  # does a symlink exist?
 | 
					 | 
				
			||||||
        # NB: It's important to check for is_symlink here and not for exists,
 | 
					 | 
				
			||||||
        # because invalid/outdated symlinks would return False otherwise.
 | 
					 | 
				
			||||||
        link_path.unlink()
 | 
					 | 
				
			||||||
    elif link_path.exists():  # does it exist otherwise?
 | 
					 | 
				
			||||||
        # NB: Check this last because valid symlinks also "exist".
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            f"Can't overwrite symlink '{link_name}'",
 | 
					 | 
				
			||||||
            "This can happen if your data directory contains a directory or "
 | 
					 | 
				
			||||||
            "file of the same name.",
 | 
					 | 
				
			||||||
            exits=1,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    details = f"{model_path} --> {link_path}"
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        symlink_to(link_path, model_path)
 | 
					 | 
				
			||||||
    except:  # noqa: E722
 | 
					 | 
				
			||||||
        # This is quite dirty, but just making sure other errors are caught.
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            f"Couldn't link model to '{link_name}'",
 | 
					 | 
				
			||||||
            "Creating a symlink in spacy/data failed. Make sure you have the "
 | 
					 | 
				
			||||||
            "required permissions and try re-running the command as admin, or "
 | 
					 | 
				
			||||||
            "use a virtualenv. You can still import the model as a module and "
 | 
					 | 
				
			||||||
            "call its load() method, or create the symlink manually.",
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
        msg.text(details)
 | 
					 | 
				
			||||||
        raise
 | 
					 | 
				
			||||||
    msg.good("Linking successful", details)
 | 
					 | 
				
			||||||
    msg.text(f"You can now load the model via spacy.load('{link_name}')")
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,10 +1,8 @@
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
import requests
 | 
					import requests
 | 
				
			||||||
import srsly
 | 
					 | 
				
			||||||
from wasabi import msg
 | 
					from wasabi import msg
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from ..util import get_data_path
 | 
					 | 
				
			||||||
from .. import about
 | 
					from .. import about
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,6 +11,50 @@ def validate():
 | 
				
			||||||
    Validate that the currently installed version of spaCy is compatible
 | 
					    Validate that the currently installed version of spaCy is compatible
 | 
				
			||||||
    with the installed models. Should be run after `pip install -U spacy`.
 | 
					    with the installed models. Should be run after `pip install -U spacy`.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
 | 
					    model_pkgs, compat = get_model_pkgs()
 | 
				
			||||||
 | 
					    spacy_version = about.__version__.rsplit(".dev", 1)[0]
 | 
				
			||||||
 | 
					    current_compat = compat.get(spacy_version, {})
 | 
				
			||||||
 | 
					    if not current_compat:
 | 
				
			||||||
 | 
					        msg.warn(f"No compatible models found for v{spacy_version} of spaCy")
 | 
				
			||||||
 | 
					    incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
 | 
				
			||||||
 | 
					    na_models = [m for m in incompat_models if m not in current_compat]
 | 
				
			||||||
 | 
					    update_models = [m for m in incompat_models if m in current_compat]
 | 
				
			||||||
 | 
					    spacy_dir = Path(__file__).parent.parent
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    msg.divider(f"Installed models (spaCy v{about.__version__})")
 | 
				
			||||||
 | 
					    msg.info(f"spaCy installation: {spacy_dir}")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if model_pkgs:
 | 
				
			||||||
 | 
					        header = ("NAME", "VERSION", "")
 | 
				
			||||||
 | 
					        rows = []
 | 
				
			||||||
 | 
					        for name, data in model_pkgs.items():
 | 
				
			||||||
 | 
					            if data["compat"]:
 | 
				
			||||||
 | 
					                comp = msg.text("", color="green", icon="good", no_print=True)
 | 
				
			||||||
 | 
					                version = msg.text(data["version"], color="green", no_print=True)
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                version = msg.text(data["version"], color="red", no_print=True)
 | 
				
			||||||
 | 
					                comp = f"--> {compat.get(data['name'], ['n/a'])[0]}"
 | 
				
			||||||
 | 
					            rows.append((data["name"], version, comp))
 | 
				
			||||||
 | 
					        msg.table(rows, header=header)
 | 
				
			||||||
 | 
					    else:
 | 
				
			||||||
 | 
					        msg.text("No models found in your current environment.", exits=0)
 | 
				
			||||||
 | 
					    if update_models:
 | 
				
			||||||
 | 
					        msg.divider("Install updates")
 | 
				
			||||||
 | 
					        msg.text("Use the following commands to update the model packages:")
 | 
				
			||||||
 | 
					        cmd = "python -m spacy download {}"
 | 
				
			||||||
 | 
					        print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
 | 
				
			||||||
 | 
					    if na_models:
 | 
				
			||||||
 | 
					        msg.warn(
 | 
				
			||||||
 | 
					            f"The following models are not available for spaCy v{about.__version__}:",
 | 
				
			||||||
 | 
					            ", ".join(na_models),
 | 
				
			||||||
 | 
					        )
 | 
				
			||||||
 | 
					    if incompat_models:
 | 
				
			||||||
 | 
					        sys.exit(1)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def get_model_pkgs():
 | 
				
			||||||
 | 
					    import pkg_resources
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    with msg.loading("Loading compatibility table..."):
 | 
					    with msg.loading("Loading compatibility table..."):
 | 
				
			||||||
        r = requests.get(about.__compatibility__)
 | 
					        r = requests.get(about.__compatibility__)
 | 
				
			||||||
        if r.status_code != 200:
 | 
					        if r.status_code != 200:
 | 
				
			||||||
| 
						 | 
					@ -23,88 +65,11 @@ def validate():
 | 
				
			||||||
            )
 | 
					            )
 | 
				
			||||||
    msg.good("Loaded compatibility table")
 | 
					    msg.good("Loaded compatibility table")
 | 
				
			||||||
    compat = r.json()["spacy"]
 | 
					    compat = r.json()["spacy"]
 | 
				
			||||||
    version = about.__version__
 | 
					 | 
				
			||||||
    version = version.rsplit(".dev", 1)[0]
 | 
					 | 
				
			||||||
    current_compat = compat.get(version)
 | 
					 | 
				
			||||||
    if not current_compat:
 | 
					 | 
				
			||||||
        msg.fail(
 | 
					 | 
				
			||||||
            f"Can't find spaCy v{version} in compatibility table",
 | 
					 | 
				
			||||||
            about.__compatibility__,
 | 
					 | 
				
			||||||
            exits=1,
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    all_models = set()
 | 
					    all_models = set()
 | 
				
			||||||
    for spacy_v, models in dict(compat).items():
 | 
					    for spacy_v, models in dict(compat).items():
 | 
				
			||||||
        all_models.update(models.keys())
 | 
					        all_models.update(models.keys())
 | 
				
			||||||
        for model, model_vs in models.items():
 | 
					        for model, model_vs in models.items():
 | 
				
			||||||
            compat[spacy_v][model] = [reformat_version(v) for v in model_vs]
 | 
					            compat[spacy_v][model] = [reformat_version(v) for v in model_vs]
 | 
				
			||||||
    model_links = get_model_links(current_compat)
 | 
					 | 
				
			||||||
    model_pkgs = get_model_pkgs(current_compat, all_models)
 | 
					 | 
				
			||||||
    incompat_links = {l for l, d in model_links.items() if not d["compat"]}
 | 
					 | 
				
			||||||
    incompat_models = {d["name"] for _, d in model_pkgs.items() if not d["compat"]}
 | 
					 | 
				
			||||||
    incompat_models.update(
 | 
					 | 
				
			||||||
        [d["name"] for _, d in model_links.items() if not d["compat"]]
 | 
					 | 
				
			||||||
    )
 | 
					 | 
				
			||||||
    na_models = [m for m in incompat_models if m not in current_compat]
 | 
					 | 
				
			||||||
    update_models = [m for m in incompat_models if m in current_compat]
 | 
					 | 
				
			||||||
    spacy_dir = Path(__file__).parent.parent
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    msg.divider(f"Installed models (spaCy v{about.__version__})")
 | 
					 | 
				
			||||||
    msg.info(f"spaCy installation: {spacy_dir}")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if model_links or model_pkgs:
 | 
					 | 
				
			||||||
        header = ("TYPE", "NAME", "MODEL", "VERSION", "")
 | 
					 | 
				
			||||||
        rows = []
 | 
					 | 
				
			||||||
        for name, data in model_pkgs.items():
 | 
					 | 
				
			||||||
            rows.append(get_model_row(current_compat, name, data, msg))
 | 
					 | 
				
			||||||
        for name, data in model_links.items():
 | 
					 | 
				
			||||||
            rows.append(get_model_row(current_compat, name, data, msg, "link"))
 | 
					 | 
				
			||||||
        msg.table(rows, header=header)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        msg.text("No models found in your current environment.", exits=0)
 | 
					 | 
				
			||||||
    if update_models:
 | 
					 | 
				
			||||||
        msg.divider("Install updates")
 | 
					 | 
				
			||||||
        msg.text("Use the following commands to update the model packages:")
 | 
					 | 
				
			||||||
        cmd = "python -m spacy download {}"
 | 
					 | 
				
			||||||
        print("\n".join([cmd.format(pkg) for pkg in update_models]) + "\n")
 | 
					 | 
				
			||||||
    if na_models:
 | 
					 | 
				
			||||||
        msg.text(
 | 
					 | 
				
			||||||
            f"The following models are not available for spaCy "
 | 
					 | 
				
			||||||
            f"v{about.__version__}: {', '.join(na_models)}"
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    if incompat_links:
 | 
					 | 
				
			||||||
        msg.text(
 | 
					 | 
				
			||||||
            f"You may also want to overwrite the incompatible links using the "
 | 
					 | 
				
			||||||
            f"`python -m spacy link` command with `--force`, or remove them "
 | 
					 | 
				
			||||||
            f"from the data directory. "
 | 
					 | 
				
			||||||
            f"Data path: {get_data_path()}"
 | 
					 | 
				
			||||||
        )
 | 
					 | 
				
			||||||
    if incompat_models or incompat_links:
 | 
					 | 
				
			||||||
        sys.exit(1)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_model_links(compat):
 | 
					 | 
				
			||||||
    links = {}
 | 
					 | 
				
			||||||
    data_path = get_data_path()
 | 
					 | 
				
			||||||
    if data_path:
 | 
					 | 
				
			||||||
        models = [p for p in data_path.iterdir() if is_model_path(p)]
 | 
					 | 
				
			||||||
        for model in models:
 | 
					 | 
				
			||||||
            meta_path = Path(model) / "meta.json"
 | 
					 | 
				
			||||||
            if not meta_path.exists():
 | 
					 | 
				
			||||||
                continue
 | 
					 | 
				
			||||||
            meta = srsly.read_json(meta_path)
 | 
					 | 
				
			||||||
            link = model.parts[-1]
 | 
					 | 
				
			||||||
            name = meta["lang"] + "_" + meta["name"]
 | 
					 | 
				
			||||||
            links[link] = {
 | 
					 | 
				
			||||||
                "name": name,
 | 
					 | 
				
			||||||
                "version": meta["version"],
 | 
					 | 
				
			||||||
                "compat": is_compat(compat, name, meta["version"]),
 | 
					 | 
				
			||||||
            }
 | 
					 | 
				
			||||||
    return links
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_model_pkgs(compat, all_models):
 | 
					 | 
				
			||||||
    import pkg_resources
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    pkgs = {}
 | 
					    pkgs = {}
 | 
				
			||||||
    for pkg_name, pkg_data in pkg_resources.working_set.by_key.items():
 | 
					    for pkg_name, pkg_data in pkg_resources.working_set.by_key.items():
 | 
				
			||||||
        package = pkg_name.replace("-", "_")
 | 
					        package = pkg_name.replace("-", "_")
 | 
				
			||||||
| 
						 | 
					@ -113,29 +78,9 @@ def get_model_pkgs(compat, all_models):
 | 
				
			||||||
            pkgs[pkg_name] = {
 | 
					            pkgs[pkg_name] = {
 | 
				
			||||||
                "name": package,
 | 
					                "name": package,
 | 
				
			||||||
                "version": version,
 | 
					                "version": version,
 | 
				
			||||||
                "compat": is_compat(compat, package, version),
 | 
					                "compat": package in compat and version in compat[package],
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
    return pkgs
 | 
					    return pkgs, compat
 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def get_model_row(compat, name, data, msg, model_type="package"):
 | 
					 | 
				
			||||||
    if data["compat"]:
 | 
					 | 
				
			||||||
        comp = msg.text("", color="green", icon="good", no_print=True)
 | 
					 | 
				
			||||||
        version = msg.text(data["version"], color="green", no_print=True)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        version = msg.text(data["version"], color="red", no_print=True)
 | 
					 | 
				
			||||||
        comp = f"--> {compat.get(data['name'], ['n/a'])[0]}"
 | 
					 | 
				
			||||||
    return (model_type, name, data["name"], version, comp)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_model_path(model_path):
 | 
					 | 
				
			||||||
    exclude = ["cache", "pycache", "__pycache__"]
 | 
					 | 
				
			||||||
    name = model_path.parts[-1]
 | 
					 | 
				
			||||||
    return model_path.is_dir() and name not in exclude and not name.startswith(".")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_compat(compat, name, version):
 | 
					 | 
				
			||||||
    return name in compat and version in compat[name]
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def reformat_version(version):
 | 
					def reformat_version(version):
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,6 @@ e.g. `unicode_`.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
DOCS: https://spacy.io/api/top-level#compat
 | 
					DOCS: https://spacy.io/api/top-level#compat
 | 
				
			||||||
"""
 | 
					"""
 | 
				
			||||||
import os
 | 
					 | 
				
			||||||
import sys
 | 
					import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
from thinc.util import copy_array
 | 
					from thinc.util import copy_array
 | 
				
			||||||
| 
						 | 
					@ -43,33 +42,6 @@ is_linux = sys.platform.startswith("linux")
 | 
				
			||||||
is_osx = sys.platform == "darwin"
 | 
					is_osx = sys.platform == "darwin"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def symlink_to(orig, dest):
 | 
					 | 
				
			||||||
    """Create a symlink. Used for model shortcut links.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    orig (unicode / Path): The origin path.
 | 
					 | 
				
			||||||
    dest (unicode / Path): The destination path of the symlink.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if is_windows:
 | 
					 | 
				
			||||||
        import subprocess
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        subprocess.check_call(["mklink", "/d", str(orig), str(dest)], shell=True)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        orig.symlink_to(dest)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def symlink_remove(link):
 | 
					 | 
				
			||||||
    """Remove a symlink. Used for model shortcut links.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    link (unicode / Path): The path to the symlink.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    # https://stackoverflow.com/q/26554135/6400719
 | 
					 | 
				
			||||||
    if os.path.isdir(str(link)) and is_windows:
 | 
					 | 
				
			||||||
        # this should only be on Py2.7 and windows
 | 
					 | 
				
			||||||
        os.rmdir(str(link))
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        os.unlink(str(link))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def is_config(windows=None, linux=None, osx=None, **kwargs):
 | 
					def is_config(windows=None, linux=None, osx=None, **kwargs):
 | 
				
			||||||
    """Check if a specific configuration of Python version and operating system
 | 
					    """Check if a specific configuration of Python version and operating system
 | 
				
			||||||
    matches the user's setup. Mostly used to display targeted error messages.
 | 
					    matches the user's setup. Mostly used to display targeted error messages.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -224,13 +224,8 @@ class Errors(object):
 | 
				
			||||||
    E047 = ("Can't assign a value to unregistered extension attribute "
 | 
					    E047 = ("Can't assign a value to unregistered extension attribute "
 | 
				
			||||||
            "'{name}'. Did you forget to call the `set_extension` method?")
 | 
					            "'{name}'. Did you forget to call the `set_extension` method?")
 | 
				
			||||||
    E048 = ("Can't import language {lang} from spacy.lang: {err}")
 | 
					    E048 = ("Can't import language {lang} from spacy.lang: {err}")
 | 
				
			||||||
    E049 = ("Can't find spaCy data directory: '{path}'. Check your "
 | 
					    E050 = ("Can't find model '{name}'. It doesn't seem to be a Python "
 | 
				
			||||||
            "installation and permissions, or use spacy.util.set_data_path "
 | 
					            "package or a valid path to a data directory.")
 | 
				
			||||||
            "to customise the location if necessary.")
 | 
					 | 
				
			||||||
    E050 = ("Can't find model '{name}'. It doesn't seem to be a shortcut "
 | 
					 | 
				
			||||||
            "link, a Python package or a valid path to a data directory.")
 | 
					 | 
				
			||||||
    E051 = ("Cant' load '{name}'. If you're using a shortcut link, make sure "
 | 
					 | 
				
			||||||
            "it points to a valid package (not just a data directory).")
 | 
					 | 
				
			||||||
    E052 = ("Can't find model directory: {path}")
 | 
					    E052 = ("Can't find model directory: {path}")
 | 
				
			||||||
    E053 = ("Could not read meta.json from {path}")
 | 
					    E053 = ("Could not read meta.json from {path}")
 | 
				
			||||||
    E054 = ("No valid '{setting}' setting found in model meta.json.")
 | 
					    E054 = ("No valid '{setting}' setting found in model meta.json.")
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,36 +4,8 @@ import ctypes
 | 
				
			||||||
from pathlib import Path
 | 
					from pathlib import Path
 | 
				
			||||||
from spacy import util
 | 
					from spacy import util
 | 
				
			||||||
from spacy import prefer_gpu, require_gpu
 | 
					from spacy import prefer_gpu, require_gpu
 | 
				
			||||||
from spacy.compat import symlink_to, symlink_remove, is_windows
 | 
					 | 
				
			||||||
from spacy.ml._layers import PrecomputableAffine
 | 
					from spacy.ml._layers import PrecomputableAffine
 | 
				
			||||||
from spacy.ml._layers import _backprop_precomputable_affine_padding
 | 
					from spacy.ml._layers import _backprop_precomputable_affine_padding
 | 
				
			||||||
from subprocess import CalledProcessError
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@pytest.fixture
 | 
					 | 
				
			||||||
def symlink_target():
 | 
					 | 
				
			||||||
    return Path("./foo-target")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@pytest.fixture
 | 
					 | 
				
			||||||
def symlink():
 | 
					 | 
				
			||||||
    return Path("./foo-symlink")
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
@pytest.fixture(scope="function")
 | 
					 | 
				
			||||||
def symlink_setup_target(request, symlink_target, symlink):
 | 
					 | 
				
			||||||
    if not symlink_target.exists():
 | 
					 | 
				
			||||||
        os.mkdir(str(symlink_target))
 | 
					 | 
				
			||||||
    # yield -- need to cleanup even if assertion fails
 | 
					 | 
				
			||||||
    # https://github.com/pytest-dev/pytest/issues/2508#issuecomment-309934240
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    def cleanup():
 | 
					 | 
				
			||||||
        # Remove symlink only if it was created
 | 
					 | 
				
			||||||
        if symlink.exists():
 | 
					 | 
				
			||||||
            symlink_remove(symlink)
 | 
					 | 
				
			||||||
        os.rmdir(str(symlink_target))
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    request.addfinalizer(cleanup)
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@pytest.fixture
 | 
					@pytest.fixture
 | 
				
			||||||
| 
						 | 
					@ -109,25 +81,6 @@ def test_require_gpu():
 | 
				
			||||||
            require_gpu()
 | 
					            require_gpu()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def test_create_symlink_windows(
 | 
					 | 
				
			||||||
    symlink_setup_target, symlink_target, symlink, is_admin
 | 
					 | 
				
			||||||
):
 | 
					 | 
				
			||||||
    """Test the creation of symlinks on windows. If run as admin or not on windows it should succeed, otherwise a CalledProcessError should be raised."""
 | 
					 | 
				
			||||||
    assert symlink_target.exists()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if is_admin or not is_windows:
 | 
					 | 
				
			||||||
        try:
 | 
					 | 
				
			||||||
            symlink_to(symlink, symlink_target)
 | 
					 | 
				
			||||||
            assert symlink.exists()
 | 
					 | 
				
			||||||
        except CalledProcessError as e:
 | 
					 | 
				
			||||||
            pytest.fail(e)
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        with pytest.raises(CalledProcessError):
 | 
					 | 
				
			||||||
            symlink_to(symlink, symlink_target)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
        assert not symlink.exists()
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def test_ascii_filenames():
 | 
					def test_ascii_filenames():
 | 
				
			||||||
    """Test that all filenames in the project are ASCII.
 | 
					    """Test that all filenames in the project are ASCII.
 | 
				
			||||||
    See: https://twitter.com/_inesmontani/status/1177941471632211968
 | 
					    See: https://twitter.com/_inesmontani/status/1177941471632211968
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -25,7 +25,6 @@ from .symbols import ORTH
 | 
				
			||||||
from .compat import cupy, CudaStream
 | 
					from .compat import cupy, CudaStream
 | 
				
			||||||
from .errors import Errors, Warnings, deprecation_warning, user_warning
 | 
					from .errors import Errors, Warnings, deprecation_warning, user_warning
 | 
				
			||||||
 | 
					
 | 
				
			||||||
_data_path = Path(__file__).parent / "data"
 | 
					 | 
				
			||||||
_PRINT_ENV = False
 | 
					_PRINT_ENV = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -80,27 +79,6 @@ def set_lang_class(name, cls):
 | 
				
			||||||
    registry.languages.register(name, func=cls)
 | 
					    registry.languages.register(name, func=cls)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def get_data_path(require_exists=True):
 | 
					 | 
				
			||||||
    """Get path to spaCy data directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    require_exists (bool): Only return path if it exists, otherwise None.
 | 
					 | 
				
			||||||
    RETURNS (Path or None): Data path or None.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    if not require_exists:
 | 
					 | 
				
			||||||
        return _data_path
 | 
					 | 
				
			||||||
    else:
 | 
					 | 
				
			||||||
        return _data_path if _data_path.exists() else None
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def set_data_path(path):
 | 
					 | 
				
			||||||
    """Set path to spaCy data directory.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    path (unicode or Path): Path to new data directory.
 | 
					 | 
				
			||||||
    """
 | 
					 | 
				
			||||||
    global _data_path
 | 
					 | 
				
			||||||
    _data_path = ensure_path(path)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def make_layer(arch_config):
 | 
					def make_layer(arch_config):
 | 
				
			||||||
    arch_func = registry.architectures.get(arch_config["arch"])
 | 
					    arch_func = registry.architectures.get(arch_config["arch"])
 | 
				
			||||||
    return arch_func(arch_config["config"])
 | 
					    return arch_func(arch_config["config"])
 | 
				
			||||||
| 
						 | 
					@ -141,18 +119,13 @@ def get_module_path(module):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_model(name, **overrides):
 | 
					def load_model(name, **overrides):
 | 
				
			||||||
    """Load a model from a shortcut link, package or data path.
 | 
					    """Load a model from a package or data path.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    name (unicode): Package name, shortcut link or model path.
 | 
					    name (unicode): Package name or model path.
 | 
				
			||||||
    **overrides: Specific overrides, like pipeline components to disable.
 | 
					    **overrides: Specific overrides, like pipeline components to disable.
 | 
				
			||||||
    RETURNS (Language): `Language` class with the loaded model.
 | 
					    RETURNS (Language): `Language` class with the loaded model.
 | 
				
			||||||
    """
 | 
					    """
 | 
				
			||||||
    data_path = get_data_path()
 | 
					    if isinstance(name, str):  # name or string path
 | 
				
			||||||
    if not data_path or not data_path.exists():
 | 
					 | 
				
			||||||
        raise IOError(Errors.E049.format(path=data_path))
 | 
					 | 
				
			||||||
    if isinstance(name, str):  # in data dir / shortcut
 | 
					 | 
				
			||||||
        if name in set([d.name for d in data_path.iterdir()]):
 | 
					 | 
				
			||||||
            return load_model_from_link(name, **overrides)
 | 
					 | 
				
			||||||
        if is_package(name):  # installed as package
 | 
					        if is_package(name):  # installed as package
 | 
				
			||||||
            return load_model_from_package(name, **overrides)
 | 
					            return load_model_from_package(name, **overrides)
 | 
				
			||||||
        if Path(name).exists():  # path to model data directory
 | 
					        if Path(name).exists():  # path to model data directory
 | 
				
			||||||
| 
						 | 
					@ -162,16 +135,6 @@ def load_model(name, **overrides):
 | 
				
			||||||
    raise IOError(Errors.E050.format(name=name))
 | 
					    raise IOError(Errors.E050.format(name=name))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def load_model_from_link(name, **overrides):
 | 
					 | 
				
			||||||
    """Load a model from a shortcut link, or directory in spaCy data path."""
 | 
					 | 
				
			||||||
    path = get_data_path() / name / "__init__.py"
 | 
					 | 
				
			||||||
    try:
 | 
					 | 
				
			||||||
        cls = import_file(name, path)
 | 
					 | 
				
			||||||
    except AttributeError:
 | 
					 | 
				
			||||||
        raise IOError(Errors.E051.format(name=name))
 | 
					 | 
				
			||||||
    return cls.load(**overrides)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
def load_model_from_package(name, **overrides):
 | 
					def load_model_from_package(name, **overrides):
 | 
				
			||||||
    """Load a model from an installed package."""
 | 
					    """Load a model from an installed package."""
 | 
				
			||||||
    cls = importlib.import_module(name)
 | 
					    cls = importlib.import_module(name)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user