Add smart_open requirement, update deprecated options (#11864)

* Switch from deprecated `ignore_ext` to `compression`
* Add upload/download test for local files
This commit is contained in:
Adriane Boyd 2022-11-25 13:00:57 +01:00
parent ebcc7d830f
commit 5c975565dc
4 changed files with 20 additions and 2 deletions

View File

@ -11,6 +11,7 @@ srsly>=2.4.1,<3.0.0
catalogue>=2.0.4,<2.1.0
typer>=0.3.0,<0.4.0
pathy>=0.3.5
smart-open>=5.2.1,<7.0.0
# Third party dependencies
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -49,9 +49,10 @@ install_requires =
wasabi>=0.8.1,<1.1.0
srsly>=2.4.1,<3.0.0
catalogue>=2.0.4,<2.1.0
# Third-party dependencies
typer>=0.3.0,<0.4.0
pathy>=0.3.5
# Third-party dependencies
smart-open>=5.2.1,<7.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -355,7 +355,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
if dest.exists() and not force:
return None
src = str(src)
with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
with smart_open.open(src, mode="rb", compression="disable") as input_file:
with dest.open(mode="wb") as output_file:
output_file.write(input_file.read())

View File

@ -10,6 +10,7 @@ from spacy.cli.init_config import init_config, RECOMMENDATIONS
from spacy.cli._util import validate_project_commands, parse_config_overrides
from spacy.cli._util import load_project_config, substitute_project_variables
from spacy.cli._util import string_to_list
from spacy.cli._util import upload_file, download_file
from thinc.api import ConfigValidationError, Config
import srsly
import os
@ -474,3 +475,18 @@ def test_string_to_list(value):
def test_string_to_list_intify(value):
assert string_to_list(value, intify=False) == ["1", "2", "3"]
assert string_to_list(value, intify=True) == [1, 2, 3]
def test_upload_download_local_file():
with make_tempdir() as d1, make_tempdir() as d2:
filename = "f.txt"
content = "content"
local_file = d1 / filename
remote_file = d2 / filename
with local_file.open(mode="w") as file_:
file_.write(content)
upload_file(local_file, remote_file)
local_file.unlink()
download_file(remote_file, local_file)
with local_file.open(mode="r") as file_:
assert file_.read() == content