Add helper scripts for capture and CSV export
This commit is contained in:
parent
69e0fe6576
commit
1e9d55ada8
2 changed files with 157 additions and 0 deletions
106
tools/capture_page.py
Normal file
106
tools/capture_page.py
Normal file
|
|
@ -0,0 +1,106 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from dataclasses import asdict, dataclass
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Literal
|
||||||
|
|
||||||
|
from playwright.sync_api import Error as PlaywrightError
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
|
||||||
|
Status = Literal["ok", "error"]
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class CaptureResult:
|
||||||
|
url: str
|
||||||
|
final_url: str | None
|
||||||
|
title: str | None
|
||||||
|
captured_at: str
|
||||||
|
status: Status
|
||||||
|
error: str | None
|
||||||
|
|
||||||
|
|
||||||
|
def iso_now() -> str:
|
||||||
|
return datetime.now(timezone.utc).astimezone().isoformat(timespec="seconds")
|
||||||
|
|
||||||
|
|
||||||
|
def ensure_parent(path: Path) -> None:
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Capture a web page screenshot (and optional HTML).")
|
||||||
|
parser.add_argument("--url", required=True)
|
||||||
|
parser.add_argument("--screenshot", required=True, help="Output PNG path")
|
||||||
|
parser.add_argument("--html", default=None, help="Optional output HTML path")
|
||||||
|
parser.add_argument("--full-page", action="store_true", help="Capture full-page screenshot")
|
||||||
|
parser.add_argument("--timeout-ms", type=int, default=45_000)
|
||||||
|
parser.add_argument("--wait-ms", type=int, default=1_000, help="Extra wait after DOMContentLoaded")
|
||||||
|
parser.add_argument("--user-agent", default=None)
|
||||||
|
parser.add_argument("--locale", default="fr-FR")
|
||||||
|
parser.add_argument("--timezone", default="Europe/Paris")
|
||||||
|
parser.add_argument("--referer", default=None)
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
|
||||||
|
screenshot_path = Path(args.screenshot)
|
||||||
|
html_path = Path(args.html) if args.html else None
|
||||||
|
|
||||||
|
ensure_parent(screenshot_path)
|
||||||
|
if html_path:
|
||||||
|
ensure_parent(html_path)
|
||||||
|
|
||||||
|
captured_at = iso_now()
|
||||||
|
try:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
context = browser.new_context(
|
||||||
|
locale=args.locale,
|
||||||
|
timezone_id=args.timezone,
|
||||||
|
user_agent=args.user_agent,
|
||||||
|
extra_http_headers={k: v for k, v in {"Referer": args.referer}.items() if v},
|
||||||
|
)
|
||||||
|
page = context.new_page()
|
||||||
|
page.goto(args.url, wait_until="domcontentloaded", timeout=args.timeout_ms)
|
||||||
|
if args.wait_ms:
|
||||||
|
page.wait_for_timeout(args.wait_ms)
|
||||||
|
|
||||||
|
page.screenshot(path=str(screenshot_path), full_page=bool(args.full_page))
|
||||||
|
if html_path:
|
||||||
|
html_path.write_text(page.content(), encoding="utf-8")
|
||||||
|
|
||||||
|
result = CaptureResult(
|
||||||
|
url=args.url,
|
||||||
|
final_url=page.url,
|
||||||
|
title=page.title(),
|
||||||
|
captured_at=captured_at,
|
||||||
|
status="ok",
|
||||||
|
error=None,
|
||||||
|
)
|
||||||
|
browser.close()
|
||||||
|
except (PlaywrightError, OSError, ValueError) as exc:
|
||||||
|
result = CaptureResult(
|
||||||
|
url=args.url,
|
||||||
|
final_url=None,
|
||||||
|
title=None,
|
||||||
|
captured_at=captured_at,
|
||||||
|
status="error",
|
||||||
|
error=str(exc),
|
||||||
|
)
|
||||||
|
|
||||||
|
os.write(1, (json.dumps(asdict(result), ensure_ascii=False) + "\n").encode("utf-8"))
|
||||||
|
return 0 if result.status == "ok" else 2
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
||||||
51
tools/json_to_csv.py
Normal file
51
tools/json_to_csv.py
Normal file
|
|
@ -0,0 +1,51 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import csv
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
|
FIELDS = [
|
||||||
|
"target",
|
||||||
|
"source",
|
||||||
|
"metric_name",
|
||||||
|
"metric_value",
|
||||||
|
"url",
|
||||||
|
"captured_at",
|
||||||
|
"status",
|
||||||
|
"confidence",
|
||||||
|
"notes",
|
||||||
|
"screenshot_path",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def parse_args() -> argparse.Namespace:
|
||||||
|
parser = argparse.ArgumentParser(description="Convert evidence.json to evidence.csv.")
|
||||||
|
parser.add_argument("--json", required=True, help="Path to evidence.json")
|
||||||
|
parser.add_argument("--csv", required=True, help="Path to output evidence.csv")
|
||||||
|
return parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def main() -> int:
|
||||||
|
args = parse_args()
|
||||||
|
json_path = Path(args.json)
|
||||||
|
csv_path = Path(args.csv)
|
||||||
|
csv_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
payload = json.loads(json_path.read_text(encoding="utf-8"))
|
||||||
|
rows = payload.get("evidence", [])
|
||||||
|
|
||||||
|
with csv_path.open("w", encoding="utf-8", newline="") as f:
|
||||||
|
writer = csv.DictWriter(f, fieldnames=FIELDS, extrasaction="ignore")
|
||||||
|
writer.writeheader()
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow({k: row.get(k) for k in FIELDS})
|
||||||
|
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
raise SystemExit(main())
|
||||||
|
|
||||||
Loading…
Add table
Reference in a new issue