diff --git a/requirements.txt b/requirements.txt index 8e90082b6..635024367 100644 --- a/requirements.txt +++ b/requirements.txt @@ -11,6 +11,7 @@ srsly>=2.4.3,<3.0.0 catalogue>=2.0.6,<2.1.0 typer>=0.3.0,<0.8.0 cloudpathlib>=0.7.0,<0.11.0 +smart-open>=5.2.1,<7.0.0 # Third party dependencies numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/setup.cfg b/setup.cfg index 79e10b08d..077e21402 100644 --- a/setup.cfg +++ b/setup.cfg @@ -53,6 +53,7 @@ install_requires = # Third-party dependencies typer>=0.3.0,<0.8.0 cloudpathlib>=0.7.0,<0.11.0 + smart-open>=5.2.1,<7.0.0 tqdm>=4.38.0,<5.0.0 numpy>=1.15.0 requests>=2.13.0,<3.0.0 diff --git a/spacy/cli/_util.py b/spacy/cli/_util.py index de403842c..0869ea807 100644 --- a/spacy/cli/_util.py +++ b/spacy/cli/_util.py @@ -359,11 +359,12 @@ def download_file( force (bool): Whether to force download even if file exists. If False, the download will be skipped. """ - src = ensure_pathy(src) + import smart_open if dest.exists() and not force: return None - with src.open(mode="rb") as input_file: + src = str(src) + with smart_open.open(src, mode="rb", compression="disable") as input_file: with dest.open(mode="wb") as output_file: shutil.copyfileobj(input_file, output_file)