Add smart_open dependency to fetch project assets (#5812)

* Use smart_open for project assets

* Fix assets.py

* Update pyproject.toml
This commit is contained in:
Matthew Honnibal 2020-07-26 12:15:00 +02:00 committed by GitHub
parent c288dba8e7
commit 520d25cb50
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 17 deletions

View File

@ -8,6 +8,7 @@ requires = [
"murmurhash>=0.28.0,<1.1.0", "murmurhash>=0.28.0,<1.1.0",
"thinc>=8.0.0a19,<8.0.0a30", "thinc>=8.0.0a19,<8.0.0a30",
"blis>=0.4.0,<0.5.0", "blis>=0.4.0,<0.5.0",
"pytokenizations" "pytokenizations",
"smart_open>=2.0.0,<3.0.0"
] ]
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"

View File

@ -15,6 +15,7 @@ requests>=2.13.0,<3.0.0
tqdm>=4.38.0,<5.0.0 tqdm>=4.38.0,<5.0.0
pydantic>=1.3.0,<2.0.0 pydantic>=1.3.0,<2.0.0
pytokenizations pytokenizations
smart_open>=2.0.0,<3.0.0
# Official Python utilities # Official Python utilities
setuptools setuptools
packaging packaging

View File

@ -52,6 +52,7 @@ install_requires =
requests>=2.13.0,<3.0.0 requests>=2.13.0,<3.0.0
pydantic>=1.3.0,<2.0.0 pydantic>=1.3.0,<2.0.0
pytokenizations pytokenizations
smart_open>=2.0.0,<3.0.0
# Official Python utilities # Official Python utilities
setuptools setuptools
packaging packaging

View File

@ -1,15 +1,17 @@
from typing import Optional from typing import Optional
from pathlib import Path from pathlib import Path
from wasabi import msg from wasabi import msg
import requests
import tqdm import tqdm
import re import re
import shutil import shutil
import requests
import smart_open
from ...util import ensure_path, working_dir from ...util import ensure_path, working_dir
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
# TODO: find a solution for caches # TODO: find a solution for caches
# CACHES = [ # CACHES = [
# Path.home() / ".torch", # Path.home() / ".torch",
@ -135,23 +137,12 @@ def convert_asset_url(url: str) -> str:
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None: def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
"""Download a file using requests. """Download a file using smart_open.
url (str): The URL of the file. url (str): The URL of the file.
dest (Path): The destination path. dest (Path): The destination path.
chunk_size (int): The size of chunks to read/write. chunk_size (int): The size of chunks to read/write.
""" """
response = requests.get(url, stream=True) with smart_open.open(url, mode="rb") as input_file:
response.raise_for_status() with dest.open(mode="wb") as output_file:
total = int(response.headers.get("content-length", 0)) output_file.write(input_file.read())
progress_settings = {
"total": total,
"unit": "iB",
"unit_scale": True,
"unit_divisor": chunk_size,
"leave": False,
}
with dest.open("wb") as f, tqdm.tqdm(**progress_settings) as bar:
for data in response.iter_content(chunk_size=chunk_size):
size = f.write(data)
bar.update(size)