#!/usr/bin/env python3 from __future__ import annotations import argparse import json import os from dataclasses import asdict, dataclass from datetime import datetime, timezone from pathlib import Path from typing import Any, Literal from playwright.sync_api import Error as PlaywrightError from playwright.sync_api import sync_playwright Status = Literal["ok", "error"] @dataclass(frozen=True) class CaptureResult: url: str final_url: str | None title: str | None captured_at: str status: Status error: str | None def iso_now() -> str: return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds") def ensure_parent(path: Path) -> None: path.parent.mkdir(parents=True, exist_ok=True) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Capture a web page screenshot (and optional HTML).") parser.add_argument("--url", required=True) parser.add_argument("--screenshot", required=True, help="Output PNG path") parser.add_argument("--html", default=None, help="Optional output HTML path") parser.add_argument("--full-page", action="store_true", help="Capture full-page screenshot") parser.add_argument("--timeout-ms", type=int, default=45_000) parser.add_argument("--wait-ms", type=int, default=1_000, help="Extra wait after DOMContentLoaded") parser.add_argument("--user-agent", default=None) parser.add_argument("--locale", default="fr-FR") parser.add_argument("--timezone", default="Europe/Paris") parser.add_argument("--referer", default=None) return parser.parse_args() def main() -> int: args = parse_args() screenshot_path = Path(args.screenshot) html_path = Path(args.html) if args.html else None ensure_parent(screenshot_path) if html_path: ensure_parent(html_path) captured_at = iso_now() try: with sync_playwright() as p: browser = p.chromium.launch(headless=True) context = browser.new_context( locale=args.locale, timezone_id=args.timezone, user_agent=args.user_agent, extra_http_headers={k: v for k, v in {"Referer": args.referer}.items() if v}, ) page = context.new_page() page.goto(args.url, wait_until="domcontentloaded", timeout=args.timeout_ms) if args.wait_ms: page.wait_for_timeout(args.wait_ms) page.screenshot(path=str(screenshot_path), full_page=bool(args.full_page)) if html_path: html_path.write_text(page.content(), encoding="utf-8") result = CaptureResult( url=args.url, final_url=page.url, title=page.title(), captured_at=captured_at, status="ok", error=None, ) browser.close() except (PlaywrightError, OSError, ValueError) as exc: result = CaptureResult( url=args.url, final_url=None, title=None, captured_at=captured_at, status="error", error=str(exc), ) os.write(1, (json.dumps(asdict(result), ensure_ascii=False) + "\n").encode("utf-8")) return 0 if result.status == "ok" else 2 if __name__ == "__main__": raise SystemExit(main())