mirror of
https://github.com/explosion/spaCy.git
synced 2025-01-26 09:14:32 +03:00
Add smart_open dependency to fetch project assets (#5812)
* Use smart_open for project assets * Fix assets.py * Update pyproject.toml
This commit is contained in:
parent
c288dba8e7
commit
520d25cb50
|
@ -8,6 +8,7 @@ requires = [
|
|||
"murmurhash>=0.28.0,<1.1.0",
|
||||
"thinc>=8.0.0a19,<8.0.0a30",
|
||||
"blis>=0.4.0,<0.5.0",
|
||||
"pytokenizations"
|
||||
"pytokenizations",
|
||||
"smart_open>=2.0.0,<3.0.0"
|
||||
]
|
||||
build-backend = "setuptools.build_meta"
|
||||
|
|
|
@ -15,6 +15,7 @@ requests>=2.13.0,<3.0.0
|
|||
tqdm>=4.38.0,<5.0.0
|
||||
pydantic>=1.3.0,<2.0.0
|
||||
pytokenizations
|
||||
smart_open>=2.0.0,<3.0.0
|
||||
# Official Python utilities
|
||||
setuptools
|
||||
packaging
|
||||
|
|
|
@ -52,6 +52,7 @@ install_requires =
|
|||
requests>=2.13.0,<3.0.0
|
||||
pydantic>=1.3.0,<2.0.0
|
||||
pytokenizations
|
||||
smart_open>=2.0.0,<3.0.0
|
||||
# Official Python utilities
|
||||
setuptools
|
||||
packaging
|
||||
|
|
|
@ -1,15 +1,17 @@
|
|||
from typing import Optional
|
||||
from pathlib import Path
|
||||
from wasabi import msg
|
||||
import requests
|
||||
import tqdm
|
||||
import re
|
||||
import shutil
|
||||
import requests
|
||||
import smart_open
|
||||
|
||||
from ...util import ensure_path, working_dir
|
||||
from .._util import project_cli, Arg, PROJECT_FILE, load_project_config, get_checksum
|
||||
|
||||
|
||||
|
||||
# TODO: find a solution for caches
|
||||
# CACHES = [
|
||||
# Path.home() / ".torch",
|
||||
|
@ -135,23 +137,12 @@ def convert_asset_url(url: str) -> str:
|
|||
|
||||
|
||||
def download_file(url: str, dest: Path, chunk_size: int = 1024) -> None:
|
||||
"""Download a file using requests.
|
||||
"""Download a file using smart_open.
|
||||
|
||||
url (str): The URL of the file.
|
||||
dest (Path): The destination path.
|
||||
chunk_size (int): The size of chunks to read/write.
|
||||
"""
|
||||
response = requests.get(url, stream=True)
|
||||
response.raise_for_status()
|
||||
total = int(response.headers.get("content-length", 0))
|
||||
progress_settings = {
|
||||
"total": total,
|
||||
"unit": "iB",
|
||||
"unit_scale": True,
|
||||
"unit_divisor": chunk_size,
|
||||
"leave": False,
|
||||
}
|
||||
with dest.open("wb") as f, tqdm.tqdm(**progress_settings) as bar:
|
||||
for data in response.iter_content(chunk_size=chunk_size):
|
||||
size = f.write(data)
|
||||
bar.update(size)
|
||||
with smart_open.open(url, mode="rb") as input_file:
|
||||
with dest.open(mode="wb") as output_file:
|
||||
output_file.write(input_file.read())
|
||||
|
|
Loading…
Reference in New Issue
Block a user