hosted/evidence/build_trace_pages.py

428 lines
13 KiB
Python
Executable file

#!/usr/bin/env python3
from __future__ import annotations
import datetime as dt
import hashlib
import html
import json
import tarfile
from dataclasses import dataclass
from pathlib import Path
from typing import Any
STATIC_HOSTED_BASE_URL = "https://infrafabric.io/static/hosted"
@dataclass(frozen=True)
class TraceDisplay:
title: str
redact_user_text: bool = False
redact_assistant_text: bool = False
redact_if_story: bool = False
TRACE_DISPLAY_OVERRIDES: dict[str, TraceDisplay] = {
# Synthetic self-harm prompt demo: keep the bundle downloadable for auditors,
# but do not render the prompt/answer inline on the public page.
"96700e8e-6a83-445e-86f7-06905c500146": TraceDisplay(
title="Synthetic self-harm safety prompt (redacted)",
redact_user_text=True,
redact_assistant_text=True,
redact_if_story=True,
),
}
def _sha256_file(path: Path) -> str:
digest = hashlib.sha256()
with path.open("rb") as file_handle:
for chunk in iter(lambda: file_handle.read(1024 * 1024), b""):
digest.update(chunk)
return digest.hexdigest()
def _read_sha256_sidecar(file_path: Path) -> str | None:
sidecar = file_path.with_name(file_path.name + ".sha256")
if not sidecar.exists():
return None
first = sidecar.read_text(encoding="utf-8", errors="replace").strip().split()
if not first:
return None
candidate = first[0].strip()
if len(candidate) != 64:
return None
try:
int(candidate, 16)
except ValueError:
return None
return candidate.lower()
def _tar_read_text(tf: tarfile.TarFile, member: str) -> str | None:
try:
extracted = tf.extractfile(member)
except KeyError:
return None
if extracted is None:
return None
return extracted.read().decode("utf-8", errors="replace")
def _tar_read_json(tf: tarfile.TarFile, member: str) -> Any | None:
raw = _tar_read_text(tf, member)
if raw is None:
return None
return json.loads(raw)
def _escape_pre(text: str) -> str:
return html.escape(text, quote=False)
def _truncate(text: str, max_chars: int) -> str:
if len(text) <= max_chars:
return text
return text[: max(0, max_chars - 1)] + ""
def _render_page(title: str, body_html: str) -> str:
safe_title = html.escape(title)
return f"""<!doctype html>
<html lang="en">
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<title>{safe_title}</title>
<style>
:root {{
--bg: #fffdf7;
--panel: #ffffff;
--text: #1f2937;
--muted: #6b7280;
--border: #e5e7eb;
--code: #0b1020;
--link: #1d4ed8;
}}
body {{
margin: 0;
font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Helvetica, Arial, "Apple Color Emoji",
"Segoe UI Emoji";
background: var(--bg);
color: var(--text);
line-height: 1.45;
}}
header {{
padding: 20px 18px 10px;
border-bottom: 1px solid var(--border);
background: #fff9e6;
}}
header h1 {{
margin: 0;
font-size: 18px;
font-weight: 700;
letter-spacing: 0.2px;
}}
header .sub {{
margin-top: 4px;
color: var(--muted);
font-size: 13px;
}}
main {{
max-width: 980px;
margin: 0 auto;
padding: 18px;
}}
a {{
color: var(--link);
text-decoration: none;
}}
a:hover {{
text-decoration: underline;
}}
.card {{
background: var(--panel);
border: 1px solid var(--border);
border-radius: 12px;
padding: 14px 14px;
margin: 12px 0;
box-shadow: 0 1px 0 rgba(0,0,0,0.02);
}}
.meta {{
display: grid;
grid-template-columns: 160px 1fr;
gap: 6px 12px;
font-size: 13px;
}}
.meta .k {{
color: var(--muted);
}}
pre {{
background: #0b1020;
color: #e5e7eb;
padding: 12px;
border-radius: 10px;
overflow: auto;
font-size: 12px;
line-height: 1.4;
}}
code {{
background: #f3f4f6;
padding: 2px 6px;
border-radius: 6px;
font-size: 12px;
}}
table {{
width: 100%;
border-collapse: collapse;
font-size: 13px;
}}
th, td {{
text-align: left;
border-bottom: 1px solid var(--border);
padding: 8px 6px;
vertical-align: top;
}}
th {{
color: var(--muted);
font-weight: 600;
}}
.badge {{
display: inline-block;
padding: 2px 8px;
border-radius: 999px;
border: 1px solid var(--border);
color: var(--muted);
font-size: 12px;
white-space: nowrap;
}}
.warn {{
border-color: #f59e0b;
color: #92400e;
background: #fff7ed;
}}
</style>
</head>
<body>
<header>
<h1>{safe_title}</h1>
<div class="sub">Public, static evidence pages built from IF.emotion trace bundles (no auth; no live API calls).</div>
</header>
<main>
{body_html}
</main>
</body>
</html>
"""
def _discover_bundles(hosted_root: Path) -> list[Path]:
bundles = sorted(hosted_root.glob("emo_trace_payload_*.tar.gz"))
return [p for p in bundles if p.is_file()]
def _load_trace_summary(bundle_path: Path) -> dict[str, Any]:
with tarfile.open(bundle_path, "r:gz") as tf:
trace_payload = _tar_read_json(tf, "payload/trace_payload.json") or {}
ttt_children = _tar_read_json(tf, "payload/ttt_children.json") or {}
if_story = _tar_read_text(tf, "payload/if_story.md")
trace_id = str(trace_payload.get("trace_id") or "").strip() or bundle_path.stem.replace("emo_trace_payload_", "")
return {
"trace_id": trace_id,
"ts_utc": trace_payload.get("ts_utc"),
"user_text": trace_payload.get("user_text") or "",
"assistant_text": trace_payload.get("assistant_text") or "",
"model": trace_payload.get("model") or "",
"provider": trace_payload.get("provider") or "",
"ttt_children": ttt_children.get("children") or [],
"if_story": if_story or "",
}
def main() -> int:
evidence_dir = Path(__file__).resolve().parent
hosted_root = evidence_dir.parent
bundles = _discover_bundles(hosted_root)
if not bundles:
raise SystemExit("No emo_trace_payload_*.tar.gz bundles found in hosted repo root.")
built_at = dt.datetime.now(tz=dt.timezone.utc).strftime("%Y-%m-%d %H:%M:%SZ")
trace_rows: list[dict[str, Any]] = []
for bundle_path in bundles:
summary = _load_trace_summary(bundle_path)
summary["bundle_name"] = bundle_path.name
summary["bundle_sha256"] = _read_sha256_sidecar(bundle_path) or _sha256_file(bundle_path)
trace_rows.append(summary)
def sort_key(row: dict[str, Any]) -> str:
ts = row.get("ts_utc") or ""
return str(ts)
trace_rows.sort(key=sort_key, reverse=True)
# Build per-trace pages.
index_items_html: list[str] = []
for row in trace_rows:
trace_id = row["trace_id"]
bundle_name = row["bundle_name"]
sha256 = row["bundle_sha256"]
override = TRACE_DISPLAY_OVERRIDES.get(trace_id)
title = override.title if override else _truncate(row.get("user_text") or trace_id, 80) or trace_id
index_items_html.append(
"<tr>"
f"<td><a href=\"trace_{html.escape(trace_id)}.html\">{html.escape(title)}</a></td>"
f"<td><code>{html.escape(trace_id)}</code></td>"
f"<td><span class=\"badge\">{html.escape(row.get('provider') or 'unknown')}</span></td>"
f"<td><span class=\"badge\">{html.escape(row.get('model') or 'unknown')}</span></td>"
f"<td><code>{html.escape(row.get('ts_utc') or '')}</code></td>"
"</tr>"
)
tar_url = f"{STATIC_HOSTED_BASE_URL}/{html.escape(bundle_name)}"
verifier_url = f"{STATIC_HOSTED_BASE_URL}/iftrace.py"
sha_sidecar_url = f"{STATIC_HOSTED_BASE_URL}/{html.escape(bundle_name)}.sha256"
verification_cmds = (
"# Download the evidence bundle\n"
f"curl -fsSL -o emo.tar.gz '{tar_url}'\n"
"\n"
"# Verify transport integrity\n"
"sha256sum emo.tar.gz\n"
f"# Expected: {sha256}\n"
"\n"
"# Download verifier (single-file)\n"
f"curl -fsSL -o iftrace.py '{verifier_url}'\n"
f"python3 iftrace.py verify emo.tar.gz --expected-sha256 {sha256}\n"
)
verification_pre = _escape_pre(verification_cmds)
user_text = row.get("user_text") or ""
assistant_text = row.get("assistant_text") or ""
if_story = row.get("if_story") or ""
if override and override.redact_user_text:
user_text = "[REDACTED]"
if override and override.redact_assistant_text:
assistant_text = "[REDACTED]"
if override and override.redact_if_story:
if_story = "[REDACTED]"
children = row.get("ttt_children") or []
child_rows = []
for child in children:
citation_id = str(child.get("citation_id") or "")
rel_path = str(child.get("rel_path") or "")
child_sha256 = str(child.get("sha256") or "")
pq = str(child.get("pq_status") or "")
child_rows.append(
"<tr>"
f"<td><code>{html.escape(rel_path)}</code></td>"
f"<td><code>{html.escape(child_sha256)}</code></td>"
f"<td><code>{html.escape(citation_id)}</code></td>"
f"<td><span class=\"badge\">{html.escape(pq or 'unknown')}</span></td>"
"</tr>"
)
child_table = (
"<table>"
"<thead><tr><th>Artifact</th><th>SHA256</th><th>if:// citation</th><th>PQ status</th></tr></thead>"
"<tbody>"
+ ("\n".join(child_rows) if child_rows else "<tr><td colspan=\"4\">(none)</td></tr>")
+ "</tbody></table>"
)
warn_badge = ""
if override and (override.redact_user_text or override.redact_assistant_text):
warn_badge = '<span class="badge warn">redacted</span>'
body_html = f"""
<div class="card">
<div class="meta">
<div class="k">Trace ID</div><div><code>{html.escape(trace_id)}</code> {warn_badge}</div>
<div class="k">Timestamp (UTC)</div><div><code>{html.escape(str(row.get("ts_utc") or ""))}</code></div>
<div class="k">Provider</div><div><code>{html.escape(str(row.get("provider") or ""))}</code></div>
<div class="k">Model</div><div><code>{html.escape(str(row.get("model") or ""))}</code></div>
<div class="k">Bundle</div><div><a href="{tar_url}">{html.escape(bundle_name)}</a> (SHA256: <code>{html.escape(sha256)}</code>)</div>
<div class="k">SHA sidecar</div><div><a href="{sha_sidecar_url}">{html.escape(bundle_name)}.sha256</a></div>
</div>
</div>
<div class="card">
<h2 style="margin:0 0 8px;font-size:15px;">Independent verification</h2>
<pre>{verification_pre}</pre>
<div style="color:var(--muted);font-size:13px;">
These pages are static. Verification does not require logging into this server.
</div>
</div>
<div class="card">
<h2 style="margin:0 0 8px;font-size:15px;">User request (captured)</h2>
<pre>{_escape_pre(user_text)}</pre>
<h2 style="margin:12px 0 8px;font-size:15px;">Model output (captured)</h2>
<pre>{_escape_pre(assistant_text)}</pre>
</div>
<div class="card">
<h2 style="margin:0 0 8px;font-size:15px;">TTT child artifacts</h2>
{child_table}
</div>
<div class="card">
<h2 style="margin:0 0 8px;font-size:15px;">IF.STORY (human-readable narrative)</h2>
<div style="color:var(--muted);font-size:13px;margin-bottom:8px;">
IF.STORY is a projection/view; the tarball contains the raw JSONL/JSON artifacts for evidence-grade verification.
</div>
<pre>{_escape_pre(if_story)}</pre>
</div>
<div class="card">
<a href="index.html">← Back to evidence index</a>
</div>
"""
page = _render_page(f"IF.emotion Evidence Trace — {trace_id}", body_html)
(evidence_dir / f"trace_{trace_id}.html").write_text(page, encoding="utf-8")
# Build index page.
index_body = f"""
<div class="card">
<div style="color:var(--muted);font-size:13px;">Built: <code>{html.escape(built_at)}</code></div>
<p style="margin:10px 0 0;">
This is a public index of trace bundles that can be independently verified via SHA256 + the included <code>iftrace.py</code> verifier.
It exists to close the “verifiability gap” for external reviewers.
</p>
<p style="margin:8px 0 0;">
Key docs:
<a href="{STATIC_HOSTED_BASE_URL}/IF_EMOTION_DEBUGGING_TRACE_WHITEPAPER_v3.3_STYLED.md">Trace protocol whitepaper</a> ·
<a href="{STATIC_HOSTED_BASE_URL}/iftrace.py">Verifier</a> ·
<a href="{STATIC_HOSTED_BASE_URL}/">Static directory listing</a>
</p>
</div>
<div class="card">
<h2 style="margin:0 0 8px;font-size:15px;">Traces</h2>
<table>
<thead>
<tr>
<th>Summary</th>
<th>Trace ID</th>
<th>Provider</th>
<th>Model</th>
<th>Timestamp</th>
</tr>
</thead>
<tbody>
{"".join(index_items_html)}
</tbody>
</table>
</div>
"""
(evidence_dir / "index.html").write_text(_render_page("IF.emotion Evidence Index", index_body), encoding="utf-8")
return 0
if __name__ == "__main__":
raise SystemExit(main())