Add smart_open requirement, update deprecated options (#11864)

* Switch from deprecated `ignore_ext` to `compression`
* Add upload/download test for local files
This commit is contained in:
Adriane Boyd 2022-11-25 13:00:57 +01:00
parent a8b883fead
commit 21204f17c7
4 changed files with 21 additions and 3 deletions

View File

@ -12,6 +12,7 @@ srsly>=2.4.1,<3.0.0
catalogue>=2.0.6,<2.1.0
typer>=0.3.0,<0.5.0
pathy>=0.3.5
smart-open>=5.2.1,<7.0.0
# Third party dependencies
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -51,9 +51,10 @@ install_requires =
wasabi>=0.8.1,<1.1.0
srsly>=2.4.1,<3.0.0
catalogue>=2.0.6,<2.1.0
# Third-party dependencies
typer>=0.3.0,<0.5.0
pathy>=0.3.5
# Third-party dependencies
smart-open>=5.2.1,<7.0.0
tqdm>=4.38.0,<5.0.0
numpy>=1.15.0
requests>=2.13.0,<3.0.0

View File

@ -358,7 +358,7 @@ def download_file(src: Union[str, "Pathy"], dest: Path, *, force: bool = False)
if dest.exists() and not force:
return None
src = str(src)
with smart_open.open(src, mode="rb", ignore_ext=True) as input_file:
with smart_open.open(src, mode="rb", compression="disable") as input_file:
with dest.open(mode="wb") as output_file:
output_file.write(input_file.read())

View File

@ -12,6 +12,7 @@ from spacy.cli._util import is_subpath_of, load_project_config
from spacy.cli._util import parse_config_overrides, string_to_list
from spacy.cli._util import substitute_project_variables
from spacy.cli._util import validate_project_commands
from spacy.cli._util import upload_file, download_file
from spacy.cli.debug_data import _compile_gold, _get_labels_from_model
from spacy.cli.debug_data import _get_labels_from_spancat
from spacy.cli.download import get_compatibility, get_version
@ -719,4 +720,19 @@ def test_debug_data_compile_gold():
ref = Doc(nlp.vocab, words=["Token", ".", "New York City"], sent_starts=[True, False, True], ents=["O", "B-ENT", "I-ENT"])
eg = Example(pred, ref)
data = _compile_gold([eg], ["ner"], nlp, True)
assert data["boundary_cross_ents"] == 1
assert data["boundary_cross_ents"] == 1
def test_upload_download_local_file():
with make_tempdir() as d1, make_tempdir() as d2:
filename = "f.txt"
content = "content"
local_file = d1 / filename
remote_file = d2 / filename
with local_file.open(mode="w") as file_:
file_.write(content)
upload_file(local_file, remote_file)
local_file.unlink()
download_file(remote_file, local_file)
with local_file.open(mode="r") as file_:
assert file_.read() == content