104 lines
3 KiB
Python
104 lines
3 KiB
Python
"""HTTP downloader helper."""
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import os
|
|
from dataclasses import asdict, dataclass
|
|
from typing import Dict, Optional
|
|
|
|
import requests
|
|
|
|
|
|
@dataclass
|
|
class DownloadResult:
|
|
inventory_path: str
|
|
document_name: str
|
|
url: str
|
|
local_path: str
|
|
status: str
|
|
bytes: int = 0
|
|
sha256: str = ""
|
|
notes: str = ""
|
|
http_status: Optional[int] = None
|
|
|
|
def to_dict(self) -> Dict[str, str]:
|
|
data = asdict(self)
|
|
# Align field name with manifest expectation
|
|
data["url_used"] = data.pop("url")
|
|
return data
|
|
|
|
|
|
def download_file(url: str, out_path: str, timeout: int = 30) -> Dict[str, str]:
|
|
"""Download a file over HTTP to ``out_path`` with SHA-256 integrity.
|
|
|
|
Returns a manifest-friendly dictionary with status and metadata.
|
|
"""
|
|
resp = requests.get(url, stream=True, timeout=timeout)
|
|
resp.raise_for_status()
|
|
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
|
h = hashlib.sha256()
|
|
bytes_written = 0
|
|
with open(out_path, "wb") as f:
|
|
for chunk in resp.iter_content(32768):
|
|
if chunk:
|
|
f.write(chunk)
|
|
h.update(chunk)
|
|
bytes_written += len(chunk)
|
|
return {
|
|
"url": url,
|
|
"local_path": out_path,
|
|
"bytes": bytes_written,
|
|
"sha256": h.hexdigest(),
|
|
"status": "success",
|
|
"notes": "",
|
|
}
|
|
|
|
|
|
def safe_http_download(url: str, out_path: str, inventory_path: str, document_name: str) -> DownloadResult:
|
|
try:
|
|
info = download_file(url, out_path)
|
|
return DownloadResult(
|
|
inventory_path=inventory_path,
|
|
document_name=document_name,
|
|
url=url,
|
|
local_path=out_path,
|
|
status="success",
|
|
bytes=info.get("bytes", 0),
|
|
sha256=info.get("sha256", ""),
|
|
notes=info.get("notes", ""),
|
|
http_status=200,
|
|
)
|
|
except requests.exceptions.HTTPError as exc:
|
|
status_code = exc.response.status_code if exc.response else None
|
|
status = "requires_login" if status_code and status_code in {401, 403} else "error"
|
|
return DownloadResult(
|
|
inventory_path=inventory_path,
|
|
document_name=document_name,
|
|
url=url,
|
|
local_path=out_path,
|
|
status=status,
|
|
notes=f"HTTP error: {exc}",
|
|
http_status=status_code,
|
|
)
|
|
except requests.exceptions.SSLError as exc:
|
|
return DownloadResult(
|
|
inventory_path=inventory_path,
|
|
document_name=document_name,
|
|
url=url,
|
|
local_path=out_path,
|
|
status="error",
|
|
notes=f"SSL error: {exc}",
|
|
)
|
|
except requests.exceptions.RequestException as exc:
|
|
return DownloadResult(
|
|
inventory_path=inventory_path,
|
|
document_name=document_name,
|
|
url=url,
|
|
local_path=out_path,
|
|
status="error",
|
|
notes=f"Request error: {exc}",
|
|
)
|
|
|
|
|
|
__all__ = ["download_file", "safe_http_download", "DownloadResult"]
|
|
|