Reject relative URLs in 'spacy download'

This commit is contained in:
Matthew Honnibal 2024-02-08 15:35:50 +01:00
parent e1249d3722
commit bcf150dcb4

View File

@ -4,6 +4,7 @@ from typing import Optional, Sequence
import requests import requests
import typer import typer
from wasabi import msg from wasabi import msg
from urllib.parse import urljoin
from .. import about from .. import about
from ..errors import OLD_MODEL_SHORTCUTS from ..errors import OLD_MODEL_SHORTCUTS
@ -63,6 +64,13 @@ def download(
) )
pip_args = pip_args + ("--no-deps",) pip_args = pip_args + ("--no-deps",)
if direct: if direct:
# Reject model names with '/', in order to prevent shenanigans.
if "/" in model:
msg.fail(
title="Model download rejected",
text=f"Cannot download model '{model}'. Models are expected to be file names, not URLs or fragments",
exits=True,
)
components = model.split("-") components = model.split("-")
model_name = "".join(components[:-1]) model_name = "".join(components[:-1])
version = components[-1] version = components[-1]
@ -153,7 +161,16 @@ def get_latest_version(model: str) -> str:
def download_model( def download_model(
filename: str, user_pip_args: Optional[Sequence[str]] = None filename: str, user_pip_args: Optional[Sequence[str]] = None
) -> None: ) -> None:
download_url = about.__download_url__ + "/" + filename # Construct the download URL carefully. We need to make sure we don't
# allow relative paths or other shenanigans to trick us into download
# from outside our own repo.
base_url = about.__download_url__
if not base_url.endswith("/"):
base_url = about.__download_url__ + "/"
download_url = urljoin(base_url, filename)
print(base_url, filename, download_url)
if not download_url.startswith(about.__download_url__):
raise ValueError(f"Download from {filename} rejected. Was it a relative path?")
pip_args = list(user_pip_args) if user_pip_args is not None else [] pip_args = list(user_pip_args) if user_pip_args is not None else []
cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url] cmd = [sys.executable, "-m", "pip", "install"] + pip_args + [download_url]
run_command(cmd) run_command(cmd)