#!/usr/bin/env python3 from __future__ import annotations import datetime as dt import hashlib import html import json import tarfile from dataclasses import dataclass from pathlib import Path from typing import Any STATIC_HOSTED_BASE_URL = "https://infrafabric.io/static/hosted" @dataclass(frozen=True) class TraceDisplay: title: str redact_user_text: bool = False redact_assistant_text: bool = False redact_if_story: bool = False TRACE_DISPLAY_OVERRIDES: dict[str, TraceDisplay] = { # Synthetic self-harm prompt demo: keep the bundle downloadable for auditors, # but do not render the prompt/answer inline on the public page. "96700e8e-6a83-445e-86f7-06905c500146": TraceDisplay( title="Synthetic self-harm safety prompt (redacted)", redact_user_text=True, redact_assistant_text=True, redact_if_story=True, ), } def _sha256_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as file_handle: for chunk in iter(lambda: file_handle.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() def _read_sha256_sidecar(file_path: Path) -> str | None: sidecar = file_path.with_name(file_path.name + ".sha256") if not sidecar.exists(): return None first = sidecar.read_text(encoding="utf-8", errors="replace").strip().split() if not first: return None candidate = first[0].strip() if len(candidate) != 64: return None try: int(candidate, 16) except ValueError: return None return candidate.lower() def _tar_read_text(tf: tarfile.TarFile, member: str) -> str | None: try: extracted = tf.extractfile(member) except KeyError: return None if extracted is None: return None return extracted.read().decode("utf-8", errors="replace") def _tar_read_json(tf: tarfile.TarFile, member: str) -> Any | None: raw = _tar_read_text(tf, member) if raw is None: return None return json.loads(raw) def _escape_pre(text: str) -> str: return html.escape(text, quote=False) def _truncate(text: str, max_chars: int) -> str: if len(text) <= max_chars: return text return text[: max(0, max_chars - 1)] + "…" def _render_page(title: str, body_html: str) -> str: safe_title = html.escape(title) return f""" {safe_title}

{safe_title}

Public, static evidence pages built from IF.emotion trace bundles (no auth; no live API calls).
{body_html}
""" def _discover_bundles(hosted_root: Path) -> list[Path]: bundles = sorted(hosted_root.glob("emo_trace_payload_*.tar.gz")) return [p for p in bundles if p.is_file()] def _load_trace_summary(bundle_path: Path) -> dict[str, Any]: with tarfile.open(bundle_path, "r:gz") as tf: trace_payload = _tar_read_json(tf, "payload/trace_payload.json") or {} ttt_children = _tar_read_json(tf, "payload/ttt_children.json") or {} if_story = _tar_read_text(tf, "payload/if_story.md") trace_id = str(trace_payload.get("trace_id") or "").strip() or bundle_path.stem.replace("emo_trace_payload_", "") return { "trace_id": trace_id, "ts_utc": trace_payload.get("ts_utc"), "user_text": trace_payload.get("user_text") or "", "assistant_text": trace_payload.get("assistant_text") or "", "model": trace_payload.get("model") or "", "provider": trace_payload.get("provider") or "", "ttt_children": ttt_children.get("children") or [], "if_story": if_story or "", } def main() -> int: evidence_dir = Path(__file__).resolve().parent hosted_root = evidence_dir.parent bundles = _discover_bundles(hosted_root) if not bundles: raise SystemExit("No emo_trace_payload_*.tar.gz bundles found in hosted repo root.") built_at = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y-%m-%d %H:%M:%SZ") trace_rows: list[dict[str, Any]] = [] for bundle_path in bundles: summary = _load_trace_summary(bundle_path) summary["bundle_name"] = bundle_path.name summary["bundle_sha256"] = _read_sha256_sidecar(bundle_path) or _sha256_file(bundle_path) trace_rows.append(summary) def sort_key(row: dict[str, Any]) -> str: ts = row.get("ts_utc") or "" return str(ts) trace_rows.sort(key=sort_key, reverse=True) # Build per-trace pages. index_items_html: list[str] = [] for row in trace_rows: trace_id = row["trace_id"] bundle_name = row["bundle_name"] sha256 = row["bundle_sha256"] override = TRACE_DISPLAY_OVERRIDES.get(trace_id) title = override.title if override else _truncate(row.get("user_text") or trace_id, 80) or trace_id index_items_html.append( "" f"{html.escape(title)}" f"{html.escape(trace_id)}" f"{html.escape(row.get('provider') or 'unknown')}" f"{html.escape(row.get('model') or 'unknown')}" f"{html.escape(row.get('ts_utc') or '')}" "" ) tar_url = f"{STATIC_HOSTED_BASE_URL}/{html.escape(bundle_name)}" verifier_url = f"{STATIC_HOSTED_BASE_URL}/iftrace.py" sha_sidecar_url = f"{STATIC_HOSTED_BASE_URL}/{html.escape(bundle_name)}.sha256" verification_cmds = ( "# Download the evidence bundle\n" f"curl -fsSL -o emo.tar.gz '{tar_url}'\n" "\n" "# Verify transport integrity\n" "sha256sum emo.tar.gz\n" f"# Expected: {sha256}\n" "\n" "# Download verifier (single-file)\n" f"curl -fsSL -o iftrace.py '{verifier_url}'\n" f"python3 iftrace.py verify emo.tar.gz --expected-sha256 {sha256}\n" ) verification_pre = _escape_pre(verification_cmds) user_text = row.get("user_text") or "" assistant_text = row.get("assistant_text") or "" if_story = row.get("if_story") or "" if override and override.redact_user_text: user_text = "[REDACTED]" if override and override.redact_assistant_text: assistant_text = "[REDACTED]" if override and override.redact_if_story: if_story = "[REDACTED]" children = row.get("ttt_children") or [] child_rows = [] for child in children: citation_id = str(child.get("citation_id") or "") rel_path = str(child.get("rel_path") or "") child_sha256 = str(child.get("sha256") or "") pq = str(child.get("pq_status") or "") child_rows.append( "" f"{html.escape(rel_path)}" f"{html.escape(child_sha256)}" f"{html.escape(citation_id)}" f"{html.escape(pq or 'unknown')}" "" ) child_table = ( "" "" "" + ("\n".join(child_rows) if child_rows else "") + "
ArtifactSHA256if:// citationPQ status
(none)
" ) warn_badge = "" if override and (override.redact_user_text or override.redact_assistant_text): warn_badge = 'redacted' body_html = f"""
Trace ID
{html.escape(trace_id)} {warn_badge}
Timestamp (UTC)
{html.escape(str(row.get("ts_utc") or ""))}
Provider
{html.escape(str(row.get("provider") or ""))}
Model
{html.escape(str(row.get("model") or ""))}
Bundle
{html.escape(bundle_name)} (SHA256: {html.escape(sha256)})
SHA sidecar
{html.escape(bundle_name)}.sha256

Independent verification

{verification_pre}
These pages are static. Verification does not require logging into this server.

User request (captured)

{_escape_pre(user_text)}

Model output (captured)

{_escape_pre(assistant_text)}

TTT child artifacts

{child_table}

IF.STORY (human-readable narrative)

IF.STORY is a projection/view; the tarball contains the raw JSONL/JSON artifacts for evidence-grade verification.
{_escape_pre(if_story)}
← Back to evidence index
""" page = _render_page(f"IF.emotion Evidence Trace — {trace_id}", body_html) (evidence_dir / f"trace_{trace_id}.html").write_text(page, encoding="utf-8") # Build index page. index_body = f"""
Built: {html.escape(built_at)}

This is a public index of trace bundles that can be independently verified via SHA256 + the included iftrace.py verifier. It exists to close the “verifiability gap” for external reviewers.

Key docs: Trace protocol whitepaper · Verifier · Static directory listing

Traces

{"".join(index_items_html)}
Summary Trace ID Provider Model Timestamp
""" (evidence_dir / "index.html").write_text(_render_page("IF.emotion Evidence Index", index_body), encoding="utf-8") return 0 if __name__ == "__main__": raise SystemExit(main())