diff --git a/AGENTS.md b/AGENTS.md index 433a8e4..1dbb1dc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -22,11 +22,52 @@ There is a sync job that mirrors `https://git.infrafabric.io/danny/hosted.git` i **Important:** The sync uses `rsync --delete`, so anything not in the mirrored repo would normally be removed. To keep operator-generated review artifacts stable, the sync script now excludes: - `bibles/` - `review/` +- `iftrace.py` (operator-maintained; don’t overwrite via repo sync) So **publish operator-generated bibles/review packs under**: - `/srv/hosted-static/public/bibles/…` - `/srv/hosted-static/public/review/…` +## HTML-only sandbox fallback (new; critical for external reviewers) + +Some LLM “web fetchers” can load HTML but fail on `.md/.py/.tar.gz`. To keep the IF.TTT “open governance” premise intact for external review: + +- Keep the raw assets (`.md`, `.tar.gz`) **and** provide an **HTML view** on the same stable alias surface. +- Share surface: + - Raw pack: `/static/pack/.md` + - HTML pack view: `/static/pack/` + - Raw review pack: `/static/review/.md` (alt: `/static/review-pack/.md`) + - HTML review pack view: `/static/review/` (alt: `/static/review-pack/`) + - Raw marketing: `/static/marketing/.md` + - HTML marketing view: `/static/marketing/` +- Hosted review artifacts (`/static/hosted/review/**`) also have `.html` wrappers generated post-sync. + +Implementation notes: +- Caddy rewrites `/static/*` (HTML view endpoints) to the red-team app (`pct 212`). +- Hosted `.html` wrappers are generated by `pct 210:/usr/local/bin/hosted_static_build_html_wrappers.py` after each sync. + +## Full stack + links (operator reference) + +- `/root/docs/19-ifttt-full-stack-and-working-links.md` is the “single page” reference for: + - Which apps run where (pct IDs + IPs) + - Which URLs are canonical for sharing + - Copy/paste-safe example links +- IF.TTT public overview page (hosted-static): https://infrafabric.io/static/hosted/ifttt/ + +## IF.TTT paper update review pack (known-good example) + +Use this pack when requesting external critique of the IF.TTT paper update (receipt-first chronology + public receipts + triage bundles): + +- Landing: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/` +- Pack (MD): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.md` +- Pack (HTML): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.html` +- Pack (tar.gz): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz` +- Pack hash: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz.sha256` +- Triage selector demo (canonical): `https://infrafabric.io/static/hosted/review/trace-bundles/d70ed99a/index.md` +- Offline verifier: `https://infrafabric.io/static/hosted/iftrace.py` + +Note: some LLM “web fetchers” reject `.tar.gz` with a client-side `415` even when browsers/curl succeed; use the `.html` pack in those environments. + ## Week review packs (v1.8) Week v1.8 packs are published here: diff --git a/site/red-team-shadow-dossiers/server/server.mjs b/site/red-team-shadow-dossiers/server/server.mjs index 3b17893..b8642d0 100644 --- a/site/red-team-shadow-dossiers/server/server.mjs +++ b/site/red-team-shadow-dossiers/server/server.mjs @@ -8,6 +8,24 @@ import MarkdownIt from "markdown-it"; import express from "express"; import multer from "multer"; +/* +Public, no-login receipt surface (IF.TTT) +---------------------------------------- +This server exposes Shadow Dossiers and their "receipt" artifacts via two parallel +representations: + +- Raw (download-friendly): `*.md` (and tarballs elsewhere) +- HTML views: same path without the `.md` suffix + +Rationale: some external review environments (including certain LLM "web fetchers") +reliably load `text/html` but may reject "downloadable" assets like `.md/.py/.tar.gz`. +Keeping both surfaces makes the governance/receipt premise reviewable by humans *and* +restricted sandboxes. + +Deployment detail: the stable public aliases live under `/static/*` on the public +domain and are reverse-proxied here (see operator docs: `/root/docs/17-ifttt-public-receipt-surface.md`). +*/ + const __filename = url.fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); @@ -145,6 +163,7 @@ function renderTraceHeaderHtml({ verification, job }) { const createdAt = job?.createdAt ? String(job.createdAt) : ""; const traceId = String(job?.id || ""); const style = String(job?.style || ""); + const ttt = job?._ttt_trace_receipt || null; const checks = verification?.checks || {}; const outputOk = checks.outputOk === true; @@ -154,6 +173,11 @@ function renderTraceHeaderHtml({ verification, job }) { const outputLabel = outputOk ? "PASS" : "FAIL"; const sourceLabel = sourceOk === true ? "PASS" : sourceOk === false ? "FAIL" : "UNKNOWN"; + const quantumReady = Boolean(ttt && ttt.quantum_ready === true); + const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : ""; + const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : ""; + const pqLabel = quantumReady ? `READY${pqAlgo ? ` (${pqAlgo})` : ""}` : ttt ? "ABSENT" : "UNKNOWN"; + const safeCreatedAt = createdAt ? escapeHtml(createdAt) : ""; const safeTraceId = traceId ? escapeHtml(traceId) : ""; const safeStyle = style ? escapeHtml(style) : ""; @@ -171,6 +195,7 @@ function renderTraceHeaderHtml({ verification, job }) { `
    `, `
  • Output hash check: ${escapeHtml(outputLabel)}
  • `, `
  • Source hash check: ${escapeHtml(sourceLabel)}
  • `, + `
  • Quantum-ready receipt: ${escapeHtml(pqLabel)}${pqStatus ? ` (${escapeHtml(pqStatus)})` : ""}
  • `, `
  • Quality warnings: ${warningsPresent ? "present" : "none recorded"}
  • `, `
`, ``, @@ -258,11 +283,30 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl const createdAt = job?.createdAt ? String(job.createdAt) : ""; const status = job?.status ? String(job.status) : ""; const warningsPresent = Boolean(job?.warnings && String(job.warnings).trim()); + const ttt = job?._ttt_trace_receipt || job?.tttTraceReceipt || null; + const tttId = ttt && ttt.id ? String(ttt.id) : ""; + const tttHash = ttt && ttt.content_hash ? String(ttt.content_hash) : ""; + const pqReady = Boolean(ttt && ttt.quantum_ready === true); + const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : ""; + const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : ""; + + const traceId = String(job?.id || "").trim(); + const tracePrefixRaw = traceId ? traceId.split("-")[0] : ""; + const tracePrefix = /^[0-9a-f]{8}$/i.test(tracePrefixRaw) ? tracePrefixRaw.toLowerCase() : ""; + const triageSelectorUrl = tracePrefix + ? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.html` + : ""; + const triageSelectorUrlRaw = tracePrefix + ? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.md` + : ""; const dossierUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}`; const traceUrl = `${primaryBase}/static/trace/${encodeURIComponent(shareId)}`; const downloadUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}/download`; const packUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}.md`; + const packHtmlUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}`; + const reviewHtmlUrl = `${primaryBase}/static/review/${encodeURIComponent(shareId)}`; + const marketingHtmlUrl = `${primaryBase}/static/marketing/${encodeURIComponent(shareId)}`; const sourceUrl = job?.sourceSha256 ? `${primaryBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}` : ""; @@ -272,6 +316,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl const directTraceUrl = `${directBase}/r/${encodeURIComponent(shareId)}/trace`; const directDownloadUrl = `${directBase}/r/${encodeURIComponent(shareId)}/download`; const directPackUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack.md`; + const directPackHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack`; + const directReviewHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/review-pack`; + const directMarketingHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/marketing`; const lastResortBase = normalizeBaseUrl(publicBaseUrl); const lastResortDossierUrl = lastResortBase ? `${lastResortBase}/r/${encodeURIComponent(shareId)}` : ""; @@ -289,6 +336,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl "- You can independently verify the downloaded dossier Markdown by hashing it and comparing to `Output sha256` below.", "- You can independently verify the hosted source file (if present) by hashing it and comparing to `Source sha256` below.", "- This page binds those two fingerprints together as a single public evidence record.", + pqReady + ? `- This trace also has a **Quantum-ready** signed receipt record (${pqAlgo || "PQ"}; ${pqStatus || "hybrid"}).` + : "- This trace does not claim any post-quantum proof unless the header says QUANTUM READY.", "", "## What this trace does not prove", "", @@ -301,6 +351,11 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl `- Dossier (rendered): ${dossierUrl}`, `- Dossier (download Markdown): ${downloadUrl}`, `- Single-file pack (review + dossier + trace): ${packUrl}`, + `- Pack (HTML view; for restrictive sandboxes): ${packHtmlUrl}`, + `- Review pack (HTML view; links-only): ${reviewHtmlUrl}`, + `- Marketing excerpt (HTML view): ${marketingHtmlUrl}`, + triageSelectorUrl ? `- Offline bundles (triage selector): ${triageSelectorUrl}` : null, + triageSelectorUrlRaw ? `- Offline bundles (raw Markdown): ${triageSelectorUrlRaw}` : null, sourceUrl ? `- Source (PDF): ${sourceUrl}` : null, `- This trace page: ${traceUrl}`, mirrorBase ? "" : null, @@ -308,6 +363,7 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl mirrorBase ? "" : null, mirrorBase ? `- Dossier: ${mirrorBase}/static/dossier/${encodeURIComponent(shareId)}` : null, mirrorBase ? `- Pack: ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}.md` : null, + mirrorBase ? `- Pack (HTML view): ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}` : null, mirrorBase ? `- Trace: ${mirrorBase}/static/trace/${encodeURIComponent(shareId)}` : null, mirrorBase && sourceUrl ? `- Source: ${mirrorBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}` : null, "", @@ -316,6 +372,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl `- Dossier: ${directDossierUrl}`, `- Download: ${directDownloadUrl}`, `- Pack: ${directPackUrl}`, + `- Pack (HTML view): ${directPackHtmlUrl}`, + `- Review pack (HTML view): ${directReviewHtmlUrl}`, + `- Marketing excerpt (HTML view): ${directMarketingHtmlUrl}`, `- Trace: ${directTraceUrl}`, lastResortBase && lastResortBase !== directBase ? "" : null, lastResortBase && lastResortBase !== directBase ? "## Last resort (alternate host)" : null, @@ -338,6 +397,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl `- Source sha256: \`${job.sourceSha256 || ""}\``, `- Style: \`${job.style || ""}\``, `- Source bytes: \`${String(job.sourceBytes ?? "")}\``, + tttId ? `- Signed trace receipt ID: \`${tttId}\`` : null, + tttHash ? `- Signed trace receipt hash: \`${tttHash}\`` : null, + pqStatus ? `- PQ status: \`${pqStatus}\`` : null, "", "## How to verify (locally)", "", @@ -705,6 +767,140 @@ function staticPublicBaseUrlForRequest(req, fallbackPublicBaseUrl) { return publicBaseFromRequest(req, fallbackPublicBaseUrl); } +function tttRegistryBaseUrl() { + const explicit = String(process.env.TTT_REGISTRY_BASE_URL || "").trim(); + return explicit ? explicit.replace(/\/+$/g, "") : ""; +} + +function tttRegistryApiToken() { + return String(process.env.TTT_API_TOKEN || "").trim(); +} + +async function fetchJson(url, { method = "GET", headers, body, timeoutMs = 4500 } = {}) { + const controller = new AbortController(); + const t = setTimeout(() => controller.abort(), timeoutMs); + try { + const resp = await fetch(url, { method, headers, body, signal: controller.signal }); + const text = await resp.text(); + let data = null; + try { + data = JSON.parse(text); + } catch { + data = { raw: text }; + } + return { ok: resp.ok, status: resp.status, data }; + } finally { + clearTimeout(t); + } +} + +function traceReceiptRecordId(traceId) { + const id = String(traceId || "").trim(); + if (!looksLikeUuid(id)) return ""; + return `if://trace/${id}/v1`; +} + +function buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl }) { + const base = normalizeBaseUrl(staticPublicBaseUrl || process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io"); + + const sid = String(shareId || "").trim(); + const traceId = String(job?.id || "").trim(); + + const sourceExt = String(job?.sourcePath ? path.extname(job.sourcePath) : "").toLowerCase() || ".pdf"; + const sourceUrl = job?.sourceSha256 ? `${base}/static/source/${job.sourceSha256}${sourceExt}` : ""; + + return { + share_id: sid, + trace_id: traceId, + created_at: job?.createdAt || "", + style: job?.style || "", + source_sha256: job?.sourceSha256 || "", + output_sha256: job?.outputSha256 || "", + urls: { + pack_md: `${base}/static/pack/${encodeURIComponent(sid)}.md`, + pack_html: `${base}/static/pack/${encodeURIComponent(sid)}`, + review_html: `${base}/static/review/${encodeURIComponent(sid)}`, + marketing_html: `${base}/static/marketing/${encodeURIComponent(sid)}`, + dossier_html: `${base}/static/dossier/${encodeURIComponent(sid)}`, + dossier_md: `${base}/static/dossier/${encodeURIComponent(sid)}/download`, + trace_html: `${base}/static/trace/${encodeURIComponent(sid)}`, + source_pdf: sourceUrl, + }, + }; +} + +function summarizeTttRecord(record) { + const rec = record && typeof record === "object" ? record : null; + if (!rec) return null; + const pqStatus = String(rec.pq_status || "").trim(); + const pqAlgo = String(rec.pq_algo || "").trim(); + const signaturePqPresent = Boolean(rec.signature_pq); + return { + id: String(rec.id || "").trim(), + content_hash: String(rec.content_hash || "").trim(), + signer: String(rec.signer || "").trim(), + pq_status: pqStatus, + pq_algo: pqAlgo, + pq_signature_present: signaturePqPresent, + quantum_ready: signaturePqPresent && pqStatus !== "classical-only", + }; +} + +async function fetchTttRecordById(recordId) { + const base = tttRegistryBaseUrl(); + if (!base || !recordId) return { ok: false, status: 0, record: null }; + const u = `${base}/v1/citation?id=${encodeURIComponent(recordId)}`; + try { + const { ok, status, data } = await fetchJson(u, { method: "GET" }); + const verified = Boolean(data && data.verified === true); + const record = verified && data && data.record ? data.record : null; + return { ok: ok && verified, status, record }; + } catch { + return { ok: false, status: 0, record: null }; + } +} + +async function upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl }) { + const base = tttRegistryBaseUrl(); + if (!base) return { ok: false, status: 0, record: null, mode: "disabled" }; + + const rid = traceReceiptRecordId(job?.id); + if (!rid) return { ok: false, status: 0, record: null, mode: "invalid_trace_id" }; + + // Best effort: read-only GET first (no token required). + const existing = await fetchTttRecordById(rid); + if (existing.ok && existing.record) return { ok: true, status: 200, record: existing.record, mode: "found" }; + + const token = tttRegistryApiToken(); + if (!token) return { ok: false, status: 0, record: null, mode: "no_token" }; + + const evidence = buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl }); + const claim = `IF.TTT trace receipt for shareId=${shareId} trace_id=${job.id}`; + const payload = { + id: rid, + claim, + evidence, + timestamp: job?.createdAt || undefined, + }; + + const url = `${base}/v1/citation`; + try { + const { ok, status, data } = await fetchJson(url, { + method: "POST", + headers: { + Authorization: `Bearer ${token}`, + "Content-Type": "application/json; charset=utf-8", + }, + body: JSON.stringify(payload), + timeoutMs: 6500, + }); + if (!ok || !data || !data.record) return { ok: false, status, record: null, mode: "create_failed" }; + return { ok: true, status, record: data.record, mode: "created" }; + } catch { + return { ok: false, status: 0, record: null, mode: "create_failed" }; + } +} + function looksLikeUuid(value) { return /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.test(String(value || "")); } @@ -1046,11 +1242,27 @@ function main() { const job = readJob(jobsDir, share.jobId); if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found"); - const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir }); - const jobForRender = { ...job, _verification: verification }; - const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io"); const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl); + + // Best-effort: attach a registry-signed trace receipt record so we can render + // black/white “QUANTUM READY” without over-claiming. If the registry is + // unreachable, we still render the classic hash receipt. + let tttTraceReceipt = job.tttTraceReceipt || null; + if (!tttTraceReceipt || !tttTraceReceipt.id || !tttTraceReceipt.content_hash) { + const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl }); + if (ttt.ok && ttt.record) { + tttTraceReceipt = summarizeTttRecord(ttt.record); + if (tttTraceReceipt) { + job.tttTraceReceipt = tttTraceReceipt; + writeJob(jobsDir, job); + } + } + } + + const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir }); + const jobForRender = { ...job, _verification: verification, _ttt_trace_receipt: tttTraceReceipt }; + const md = renderTraceMarkdown({ shareId, job: jobForRender, publicBaseUrl, staticPublicBaseUrl }); const html = markdown.render(md); @@ -1059,7 +1271,10 @@ function main() { `Back to dossier`, `Download Markdown`, job.sourcePath ? `Download source` : "", + `Review pack (HTML)`, `Review pack (MD)`, + `Single-file pack (HTML)`, + `Single-file pack (MD)`, ] .filter(Boolean) .join(" · "); @@ -1083,6 +1298,9 @@ function main() { res.redirect(302, staticFile.urlPath); }); + // NOTE: These routes intentionally come in pairs: + // - `*.md` is the raw, download-friendly artifact + // - same path without `.md` is the HTML view (for HTML-only sandboxes) app.get("/r/:shareId/review-pack.md", (req, res) => { const shareId = String(req.params.shareId || "").trim(); if (!shareId) return res.status(404).type("text/plain").send("Not found"); @@ -1106,6 +1324,33 @@ function main() { .send(md); }); + app.get("/r/:shareId/review-pack", (req, res) => { + const shareId = String(req.params.shareId || "").trim(); + if (!shareId) return res.status(404).type("text/plain").send("Not found"); + const share = readShare(sharesDir, shareId); + if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found"); + const job = readJob(jobsDir, share.jobId); + if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found"); + + const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot }); + const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html"); + const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId); + const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io"); + const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl); + const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : ""; + + const md = renderReviewPackMarkdown({ shareId, job, publicBaseUrl, externalReviewUrl, staticSourceUrl, staticPublicBaseUrl }); + const html = markdown.render(md); + const topLinks = [ + `Back to dossier`, + `Raw Markdown`, + `Single-file pack (HTML)`, + ] + .filter(Boolean) + .join(" · "); + res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Review pack", html, topLinksHtml: topLinks })); + }); + app.get("/r/:shareId/pack.md", (req, res) => { const shareId = String(req.params.shareId || "").trim(); if (!shareId) return res.status(404).type("text/plain").send("Not found"); @@ -1139,6 +1384,46 @@ function main() { res.status(200).type("text/markdown; charset=utf-8").send(md); }); + app.get("/r/:shareId/pack", (req, res) => { + const shareId = String(req.params.shareId || "").trim(); + if (!shareId) return res.status(404).type("text/plain").send("Not found"); + const share = readShare(sharesDir, shareId); + if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found"); + const job = readJob(jobsDir, share.jobId); + if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found"); + + const abs = path.resolve(projectRoot, job.outputPath); + if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path"); + if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found"); + const dossierMarkdown = fs.readFileSync(abs, "utf8"); + + const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot }); + const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html"); + const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId); + const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io"); + const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl); + const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : ""; + + const md = renderSingleFilePackMarkdown({ + shareId, + job, + publicBaseUrl, + externalReviewUrl, + staticSourceUrl, + staticPublicBaseUrl, + dossierMarkdown, + }); + const html = markdown.render(md); + const topLinks = [ + `Back to dossier`, + `Raw Markdown`, + `Review pack (HTML)`, + ] + .filter(Boolean) + .join(" · "); + res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Single-file pack", html, topLinksHtml: topLinks })); + }); + app.get("/r/:shareId/marketing.md", (req, res) => { const shareId = String(req.params.shareId || "").trim(); if (!shareId) return res.status(404).type("text/plain").send("Not found"); @@ -1169,6 +1454,43 @@ function main() { res.status(200).type("text/markdown; charset=utf-8").send(md); }); + app.get("/r/:shareId/marketing", (req, res) => { + const shareId = String(req.params.shareId || "").trim(); + if (!shareId) return res.status(404).type("text/plain").send("Not found"); + const share = readShare(sharesDir, shareId); + if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found"); + const job = readJob(jobsDir, share.jobId); + if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found"); + + const abs = path.resolve(projectRoot, job.outputPath); + if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path"); + if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found"); + const dossierMarkdown = fs.readFileSync(abs, "utf8"); + + const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot }); + const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io"); + const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl); + const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : ""; + + const md = renderMarketingPackMarkdown({ + shareId, + job, + publicBaseUrl, + staticPublicBaseUrl, + staticSourceUrl, + dossierMarkdown, + }); + const html = markdown.render(md); + const topLinks = [ + `Back to dossier`, + `Raw Markdown`, + `Single-file pack (HTML)`, + ] + .filter(Boolean) + .join(" · "); + res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Marketing excerpt", html, topLinksHtml: topLinks })); + }); + app.get("/r/:shareId", (req, res) => { const shareId = String(req.params.shareId || "").trim(); if (!shareId) return res.status(404).type("text/plain").send("Not found"); @@ -1189,7 +1511,11 @@ function main() { `Download Markdown`, job.sourcePath ? `Download source` : "", `IF.TTT trace`, + `Review pack (HTML)`, `Review pack (MD)`, + `Single-file pack (HTML)`, + `Single-file pack (MD)`, + `Marketing excerpt (HTML)`, `Marketing excerpt (MD)`, externalReviewUrl ? `Feedback intake (login)` : "", ] @@ -1286,6 +1612,20 @@ function main() { job.warnings = warnings ? warnings.trim() : ""; job.outputSha256 = await sha256File(absOutputPath); job.status = job.warnings ? "done_with_warnings" : "done"; + + // Best-effort: create a registry-signed trace receipt record (PQ-capable). + // This must never block publishing; failures degrade gracefully. + try { + const staticPublicBaseUrl = normalizeBaseUrl(process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io"); + const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl }); + if (ttt.ok && ttt.record) { + const summary = summarizeTttRecord(ttt.record); + if (summary) job.tttTraceReceipt = summary; + } + } catch { + // ignore + } + writeJob(jobsDir, job); } catch (e) { job.status = "error"; diff --git a/src/revoice/generate.py b/src/revoice/generate.py index 58d538d..bd6b3e4 100644 --- a/src/revoice/generate.py +++ b/src/revoice/generate.py @@ -287,6 +287,42 @@ def _looks_like_navigation_heavy_source(text: str) -> bool: return marker_hits >= 6 +def _looks_like_cover_subtitle_noise(value: str) -> bool: + """ + Heuristic: cover subtitles should be "title-ish" (short, headline-like), + not body sentences, author blocks, or explanatory prose. + """ + s = " ".join((value or "").split()).strip() + if not s: + return True + + # Body fragments often start mid-sentence (lowercase). + if re.match(r"^[a-z]", s): + return True + + # Cover subtitles should be short; long multi-clause prose is usually body copy. + if len(s.split()) > 18: + return True + + # Long prose ending with a period is rarely a subtitle for these sources. + if s.endswith(".") and len(s) > 60: + return True + + low = s.lower() + if "from left to right" in low: + return True + + # Author/credential blocks (common in analyst PDFs) aren't useful as subtitles. + if re.search(r"\b(cissp|ccsk|phd|research director|business value manager)\b", low): + return True + + # Many commas in a long line suggests author list / affiliations. + if s.count(",") >= 3 and len(s) > 80: + return True + + return False + + def _extract_urls(text: str) -> list[str]: urls: list[str] = [] for match in _URL_RE.finditer(text): @@ -627,10 +663,59 @@ def _parse_title_block(lines: list[str]) -> tuple[str, int]: while i < len(lines) and not lines[i].strip(): i += 1 title_lines: list[str] = [] + # Title blocks should be short; OCR/PDF extraction sometimes concatenates body text into the "title". + # Heuristic: keep up to a few short lines and stop before body-like lines (long sentences, URLs, etc.). + max_title_lines = 3 + max_title_words = 14 + max_title_chars = 110 + max_total_words = 18 + max_total_chars = 120 + total_words = 0 + total_chars = 0 while i < len(lines) and lines[i].strip(): stripped = lines[i].strip() - if stripped.lower() != "snyk": - title_lines.append(stripped) + lower = stripped.lower() + if lower == "snyk": + i += 1 + continue + # Skip common page-header noise (e.g., "… | Datasheet 1"). + if "|" in stripped and "datasheet" in lower: + i += 1 + continue + # If the very first non-empty line is already "body-like", synthesize a short title + # from it but keep the full line in the body (do not consume it). + word_count = len(stripped.split()) + looks_body_like = ( + len(stripped) > max_title_chars + or word_count > max_title_words + or bool(re.search(r"\\s{3,}", stripped)) + or "http://" in lower + or "https://" in lower + or (stripped.endswith(".") and word_count > 8) + ) + if not title_lines and looks_body_like: + title_lines.append(_compact_title(stripped, max_chars=72)) + break + # Stop title capture when we hit body-like lines. + if title_lines: + if "http://" in lower or "https://" in lower: + break + if len(stripped) > max_title_chars: + break + if word_count > max_title_words: + break + if stripped.endswith(".") and word_count > 8: + break + # Global caps: don't let multiple short lines turn into a paragraph-sized title. + if total_words + word_count > max_total_words: + break + if total_chars + len(stripped) > max_total_chars: + break + if len(title_lines) >= max_title_lines: + break + title_lines.append(stripped) + total_words += len(stripped.split()) + total_chars += len(stripped) i += 1 while i < len(lines) and not lines[i].strip(): i += 1 @@ -1547,11 +1632,13 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None: ) if title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper: + is_llm_context = any(k in title_upper for k in ["LLM", "MODEL", "RAG", "PROMPT"]) return ctx.pick_unique( kind="diagram:architecture", key=title, variants=[ - """flowchart TD + ( + """flowchart TD A["User"] --> B["App"] B --> C["LLM"] C --> D["Tools"] @@ -1559,6 +1646,16 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None: D --> F["External systems"] E --> C """ + if is_llm_context + else """flowchart TD + A["Actor"] --> B["Workflow / system"] + B --> C["Policy decision (rules)"] + C --> D["Gate: enforce / block"] + D --> E["Evidence signals (logs)"] + E --> F["Audit / review cycle"] + F --> C +""" + ) ], used=ctx.used_diagrams, ) @@ -2035,7 +2132,7 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext) ctx=ctx, key=section.title, ) - if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "access request", "screenshot"): + if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "screenshot", "attestation"): return _daveify_callout_reframe( "\n".join( [ @@ -2097,8 +2194,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext) ) if ctx.locale.lower().startswith("fr"): - anchors = _extract_numeric_anchors(section.body, limit=2) - anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else "" + anchors = _extract_numeric_anchors(section.body, limit=4) + anchor = "" + for candidate in anchors: + # Skip citation years in callouts; they read like hallucinated trivia. + if re.fullmatch(r"20\d{2}", candidate): + continue + anchor = candidate + break + anchor_hint = f" ({anchor})" if anchor else "" variants = [ "\n".join( [ @@ -2151,8 +2255,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext) if not section.body.strip(): return None - anchors = _extract_numeric_anchors(excerpt, limit=2) - anchor_hint = f" (anchors: {', '.join(anchors)})" if anchors else "" + anchors = _extract_numeric_anchors(excerpt, limit=4) + anchor = "" + for candidate in anchors: + # Skip citation years in callouts; they read like hallucinated trivia. + if re.fullmatch(r"20\d{2}", candidate): + continue + anchor = candidate + break + anchor_hint = f" ({anchor})" if anchor else "" variants = [ "\n".join( [ @@ -2194,8 +2305,14 @@ def _render_punchline_closer(section: _SourceSection, *, ctx: _RenderContext) -> if not section.body.strip(): return None - anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=2) - anchor = anchors[0] if anchors else "" + anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=4) + anchor = "" + for candidate in anchors: + # Avoid anchoring punchlines to random citation years unless the year is actually part of the section title. + if re.fullmatch(r"20\d{2}", candidate) and candidate not in section.title: + continue + anchor = candidate + break anchor_hint = f" ({anchor})" if anchor else "" if ctx.locale.lower().startswith("fr"): @@ -2321,12 +2438,23 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: ] ) elif title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper: - paragraphs.extend( - [ - "Architecture diagrams are where optimism goes to be audited.", - "If we align on boundaries (model, tools, data, users), we can stop pretending that \"the model\" is a single component with a single risk posture.", + if ctx.voice == "v2.0": + variants = [ + "Architecture diagrams are where optimism meets the enforcement boundary (and quietly loses).", + "Architecture diagrams are forwardable; boundaries are enforceable. Dave prefers the version you can screenshot.", + "Architecture diagrams are the happy path. The first exception request is the real design review.", + "Architecture diagrams define components; governance defines who can bypass them. Only one survives audit week.", + "Architecture diagrams are the part everyone agrees on, until we name what blocks and who owns the exception path.", ] - ) + key = f"{section.title}:{_sha256_text(section.body)[:8]}" + paragraphs.append(ctx.pick_unique(kind="paragraph:architecture", key=key, variants=variants, used=ctx.used_paragraphs)) + else: + paragraphs.extend( + [ + "Architecture diagrams are where optimism goes to be audited.", + "If we align on boundaries (model, tools, data, users), we can stop pretending that \"the model\" is a single component with a single risk posture.", + ] + ) elif title_upper == "PROJECT SPONSORS": paragraphs.extend( [ @@ -2363,12 +2491,23 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: ] ) elif "SECURITY TEAM" in title_upper or "SECURITY REVIEW" in title_upper: - paragraphs.extend( - [ - "Security team efficiency is a legitimate goal, especially when review queues become the organizational truth serum.", - 'The risk is that throughput improvements are claimed without defining what "review complete" means or what evidence proves it.', + if ctx.voice == "v2.0": + variants = [ + 'Security team efficiency is a legitimate goal, especially when queues become the organization’s truth serum. The risk is claiming throughput without defining what “review complete” means or what evidence proves it.', + 'Faster reviews are defensible; unmeasured reviews are theater. Define “complete,” define the evidence, and make drift visible before the next audit season.', + 'If the security team is the bottleneck, speed matters. If speed is the metric, definitions matter: what counts as reviewed, and what signal proves it stayed reviewed?', + 'Throughput improvements only count if “done” is defined. Otherwise we are measuring calendar velocity and calling it assurance.', + 'Reducing review time is fine. Let’s just avoid the classic move: declare success, then argue about the definition of “review” when incidents arrive.', ] - ) + key = f"{section.title}:{_sha256_text(section.body)[:8]}" + paragraphs.append(ctx.pick_unique(kind="paragraph:sec_team", key=key, variants=variants, used=ctx.used_paragraphs)) + else: + paragraphs.extend( + [ + "Security team efficiency is a legitimate goal, especially when review queues become the organizational truth serum.", + 'The risk is that throughput improvements are claimed without defining what "review complete" means or what evidence proves it.', + ] + ) elif "IT MANAGEMENT" in title_upper: paragraphs.extend( [ @@ -2517,41 +2656,41 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: anchors = _extract_numeric_anchors(section.body, limit=2) if ctx.locale.lower().startswith("fr"): anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else "" + display_title = _compact_title(section.title, max_chars=72) variants = [ - f"Nous sommes alignés sur **{section.title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt qu’en langage de confort.", - f"**{section.title}**{anchor_hint} est l’endroit où la crédibilité se fabrique ; le risque « Dave » consiste à en faire une séance de ressenti plutôt qu’une frontière d’application.", - f"Cette partie (**{section.title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.", - f"Dans **{section.title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.", - f"**{section.title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.", - f"Nous aimons l’intention de **{section.title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.", - f"**{section.title}**{anchor_hint} promet du réalisme. Rendons-le mesurable : point de départ, écart, et un artefact de preuve qui ne nécessite pas un pèlerinage dans un dossier partagé.", - f"Voici **{section.title}**{anchor_hint} : la partie où nous sommes d’accord en principe. Le geste red-team : s’accorder aussi sur ce qui bloque, ce qui alerte, et qui détient l’exception.", + f"Nous sommes alignés sur **{display_title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt qu’en langage de confort.", + f"Cette partie (**{display_title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.", + f"Dans **{display_title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.", + f"**{display_title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.", + f"Nous aimons l’intention de **{display_title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.", ] else: anchor_hint = f" (notably: {', '.join(anchors)})" if anchors else "" + display_title = _compact_title(section.title, max_chars=72) if ctx.voice == "v1.6": variants = [ - f"**{section.title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.", - f"Treat **{section.title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.", - f"**{section.title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.", - f"In **{section.title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.", - f"**{section.title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”", + f"**{display_title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.", + f"Treat **{display_title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.", + f"**{display_title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.", + f"In **{display_title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.", + f"**{display_title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”", ] else: variants = [ - f"We are aligned on **{section.title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.", - f"**{section.title}** is where credibility is manufactured{anchor_hint}; the Dave failure mode is to treat it as a vibe check instead of a boundary on applicability.", - f"This section (**{section.title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.", - f"In **{section.title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.", - f"**{section.title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.", - f"We love the intent behind **{section.title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.", - f"**{section.title}**{anchor_hint} reads as a promise of realism. Make realism measurable: baseline, delta, and an evidence artifact that doesn't require a shared drive pilgrimage.", - f"This is **{section.title}**{anchor_hint}: the part where we agree in principle. The red-team ask is that we also agree on what blocks, what warns, and who owns the exception path.", + f"We are aligned on **{display_title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.", + f"This section (**{display_title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.", + f"In **{display_title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.", + f"**{display_title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.", + f"We love the intent behind **{display_title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.", ] paragraphs.append(ctx.pick_unique(kind="paragraph:fallback", key=section.title, variants=variants, used=ctx.used_paragraphs)) - out: list[str] = [f"## {section.title}"] + raw_title = section.title + heading_title = _compact_title(raw_title, max_chars=72) if ctx.voice == "v2.0" else raw_title + out: list[str] = [f"## {heading_title}"] + if heading_title != raw_title: + out.extend(["", f"> {raw_title}"]) if section.why_it_matters: out.extend(["", section.why_it_matters, ""]) else: @@ -2867,6 +3006,158 @@ def _render_action_pack(sections: list[_SourceSection]) -> str: return "\n".join(out).strip() +def _render_action_pack_v2_0(*, sections: list[_SourceSection], normalized_text: str, locale: str) -> str: + """ + IF.DAVE v2.0: reduce Action Pack boilerplate by selecting a small set of representative + sections (3–5) that cover distinct gates where possible. + """ + + candidates = _action_pack_sections(sections) + if not candidates: + return "" + + # Prefer breadth: pick one best section per gate (by body length), preserving first-seen gate order. + by_gate: dict[str, list[_SourceSection]] = {} + gate_order: list[str] = [] + for sec in candidates: + gate = _action_pack_gate(sec) + if gate not in by_gate: + by_gate[gate] = [] + gate_order.append(gate) + by_gate[gate].append(sec) + + selected: list[_SourceSection] = [] + for gate in gate_order: + secs = sorted(by_gate[gate], key=lambda s: len((s.body or "").strip()), reverse=True) + if secs: + selected.append(secs[0]) + if len(selected) >= 5: + break + + # If we have <3 distinct gates, pad with longest remaining sections (still capped at 5). + if len(selected) < 3: + def title_key(sec: _SourceSection) -> str: + return " ".join((sec.title or "").split()).strip().upper() + + seen_titles = {title_key(s) for s in selected} + remaining = [s for s in candidates if s not in selected] + remaining.sort(key=lambda s: len((s.body or "").strip()), reverse=True) + for sec in remaining: + key = title_key(sec) + if key in seen_titles: + continue + selected.append(sec) + seen_titles.add(key) + if len(selected) >= 3: + break + selected = selected[:5] + + out: list[str] = [ + "## Action Pack (Operational)" if not locale.lower().startswith("fr") else "## Action Pack (Opérationnel)", + "", + "This appendix turns the mirror into Monday-morning work: owners, gates, stop conditions, and evidence artifacts." + if not locale.lower().startswith("fr") + else "Cet appendice transforme le miroir en travail exécutable : responsables, portes, critères de blocage, et artefacts de preuve.", + "Keep it generic and auditable; adapt to your tooling without inventing fake implementation details." + if not locale.lower().startswith("fr") + else "Restez générique et auditable ; adaptez à vos outils sans inventer de fausses implémentations.", + "Minimum telemetry schema (when you claim “verifiable signals”): event_type, emitter, freshness_window, owner." + if not locale.lower().startswith("fr") + else "Schéma minimum de télémétrie (quand vous promettez des “signaux vérifiables”) : event_type, emitter, freshness_window, owner.", + "", + "### Control Cards" if not locale.lower().startswith("fr") else "### Cartes de contrôle", + ] + + for sec in selected: + display_title = _compact_title(sec.title, max_chars=72) + gate = _action_pack_gate(sec) + out.extend( + [ + "", + f"#### {display_title}", + "", + ( + f'- **Control objective:** Turn "{display_title}" into an enforceable workflow (not a narrative).' + if not locale.lower().startswith("fr") + else f'- **Objectif de contrôle :** Rendre "{display_title}" opposable (pas seulement narratif).' + ), + f"- **Gate:** {gate}" if not locale.lower().startswith("fr") else f"- **Porte :** {gate}", + f"- **Owner (RACI):** {_action_pack_owner(gate)}" + if not locale.lower().startswith("fr") + else f"- **Responsable (RACI) :** {_action_pack_owner(gate)}", + f"- **Stop condition:** {_action_pack_stop_condition(gate)}" + if not locale.lower().startswith("fr") + else f"- **Critère de blocage :** {_action_pack_stop_condition(gate)}", + f"- **Evidence artifact:** {_action_pack_evidence(gate)}" + if not locale.lower().startswith("fr") + else f"- **Artefact de preuve :** {_action_pack_evidence(gate)}", + ] + ) + + out.extend(["", "### Backlog Export (Jira-ready)" if not locale.lower().startswith("fr") else "### Backlog (Jira-ready)", ""]) + for idx, sec in enumerate(selected, 1): + gate = _action_pack_gate(sec) + display_title = _compact_title(sec.title, max_chars=72) + out.extend( + [ + f"{idx}. [{gate}] {display_title}: define owner, gate, and stop condition" + if not locale.lower().startswith("fr") + else f"{idx}. [{gate}] {display_title} : définir propriétaire, porte, et critère de blocage", + ( + " - Acceptance: owner assigned; stop condition documented and approved." + if not locale.lower().startswith("fr") + else " - Acceptance : responsable assigné ; critère de blocage documenté et approuvé." + ), + ( + " - Acceptance: evidence artifact defined and stored (machine-generated where possible)." + if not locale.lower().startswith("fr") + else " - Acceptance : artefact de preuve défini et stocké (machine-généré si possible)." + ), + ( + " - Acceptance: exceptions require owner + expiry; expiry is enforced automatically." + if not locale.lower().startswith("fr") + else " - Acceptance : exceptions = responsable + expiration ; expiration appliquée automatiquement." + ), + ] + ) + + out.extend( + [ + "", + "### Policy-as-Code Appendix (pseudo-YAML)" if not locale.lower().startswith("fr") else "### Annexe policy-as-code (pseudo-YAML)", + "", + "```yaml", + "gates:", + " pr:", + " - name: \"risk scanning\"", + " stop_condition: \"block on high severity (or unknown)\"", + " evidence: \"scan_event_id + policy_version\"", + " access:", + " - name: \"assistant enablement\"", + " prerequisite: \"device baseline + local scan signal\"", + " stop_condition: \"deny when signals missing\"", + " evidence: \"access_grant_event + prerequisite_check\"", + " runtime:", + " - name: \"tool-use\"", + " prerequisite: \"allowlist + validation\"", + " stop_condition: \"block disallowed actions\"", + " evidence: \"execution_log_id + allowlist_version\"", + "exceptions:", + " expiry_days: 14", + " require_owner: true", + " require_reason: true", + "evidence:", + " freshness_days: 30", + " require_hash: true", + "```", + ] + ) + + # Standards sources: translation table lives in the main body; Action Pack remains minimal and opposable. + _ = normalized_text + return "\n".join(out).strip() + + def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str) -> str: today = _dt.date.today().isoformat() normalized = _normalize_ocr(source_text) @@ -2932,7 +3223,8 @@ def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pac f"## {cover_h1}", ] if cover_h2: - out.extend([f"### {cover_h2}", ""]) + cover_h2_out = _compact_title(cover_h2, max_chars=90) if style_version == "v2.0" else cover_h2 + out.extend([f"### {cover_h2_out}", ""]) else: out.append("") @@ -3068,8 +3360,16 @@ def _generate_dave_v1_3_mirror(*, source_text: str, source_path: str, action_pac f"## {cover_h1}", ] ) + cover_h2_out = "" if cover_h2: - out.extend([f"### {cover_h2}", ""]) + if style_version == "v2.0": + # Avoid rendering body fragments / author blocks as a subtitle. + if not _looks_like_cover_subtitle_noise(cover_h2): + cover_h2_out = _compact_title(cover_h2, max_chars=90) + else: + cover_h2_out = cover_h2 + if cover_h2_out: + out.extend([f"### {cover_h2_out}", ""]) else: out.append("") @@ -3189,8 +3489,15 @@ def _generate_dave_v1_6_mirror(*, source_text: str, source_path: str, action_pac f"## {cover_h1}", ] ) + cover_h2_out = "" if cover_h2: - out.extend([f"### {cover_h2}", ""]) + if style_version == "v2.0": + if not _looks_like_cover_subtitle_noise(cover_h2): + cover_h2_out = _compact_title(cover_h2, max_chars=90) + else: + cover_h2_out = cover_h2 + if cover_h2_out: + out.extend([f"### {cover_h2_out}", ""]) else: out.append("") @@ -3319,8 +3626,15 @@ def _generate_dave_v1_7_mirror(*, source_text: str, source_path: str, action_pac f"## {cover_h1}", ] ) + cover_h2_out = "" if cover_h2: - out.extend([f"### {cover_h2}", ""]) + if style_version == "v2.0": + if not _looks_like_cover_subtitle_noise(cover_h2): + cover_h2_out = _compact_title(cover_h2, max_chars=90) + else: + cover_h2_out = cover_h2 + if cover_h2_out: + out.extend([f"### {cover_h2_out}", ""]) else: out.append("") @@ -3428,9 +3742,13 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s return False # Avoid internal extraction artifacts and navigation noise. lower = s.lower() + # Avoid low-signal page headers like "… | Datasheet 1". + if "datasheet" in lower: + return False if "trace id" in lower: return False - if lower.startswith("http://") or lower.startswith("https://"): + # Claims register is for measurable statements, not links (URLs cause broken rendering/quoting). + if "http://" in lower or "https://" in lower: return False if lower in {"markdown content:", "url source:"}: return False @@ -3442,7 +3760,18 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s for ln in lines: if not keep(ln): continue - if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln: + if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln and "€" not in ln and "£" not in ln: + continue + # Numeric guardrail: ignore lines where the only digits are untyped singletons (e.g., "Datasheet 1"). + lower = ln.lower() + has_multi_digit = bool(re.search(r"\b\d{2,}\b", ln)) + has_typed_singleton = bool( + re.search( + r"\b\d+\s*(?:ms|s|sec|secs|seconds|min|mins|minutes|hour|hours|day|days|x|gb|tb|mb|kb)\b", + lower, + ) + ) + if not ("%" in ln or "$" in ln or "€" in ln or "£" in ln or has_multi_digit or has_typed_singleton): continue # Skip obviously broken glyph runs. if sum(1 for ch in ln if " " <= ch <= "~") < max(8, int(len(ln) * 0.5)): @@ -3508,6 +3837,404 @@ def _render_translation_table(*, normalized_text: str, locale: str) -> str: return "\n".join(out).strip() +def _normalize_mermaid_block(text: str) -> str: + return "\n".join([ln.rstrip() for ln in str(text).strip().splitlines() if ln.strip()]) + + +def _diagram_label(diagram: str, *, locale: str) -> str: + d = diagram.lower() + if "pendingreview" in d or "exception" in d: + return "Stase d’exception" if locale.lower().startswith("fr") else "Exception stasis" + if "screenshot" in d or "artifact" in d or "evidence" in d or "attestation" in d: + return "Boucle de dérive de preuve" if locale.lower().startswith("fr") else "Evidence drift loop" + if "policy decision point" in d or "pdp" in d or "policy enforcement point" in d or "pep" in d: + return "Chaîne PDP/PEP" if locale.lower().startswith("fr") else "PDP/PEP chain" + if "rag store" in d or ("llm" in d and "tools" in d): + return "Architecture LLM + outils" if locale.lower().startswith("fr") else "LLM toolchain architecture" + if "questionnaire" in d or "vendor" in d or "third-party" in d: + return "Boucle tiers / fournisseurs" if locale.lower().startswith("fr") else "Third‑party loop" + return "Boucle de contrôle (inférée)" if locale.lower().startswith("fr") else "Inferred control loop" + + +def _apply_dave_v2_0_postprocess(md: str, *, locale: str) -> str: + """ + IF.DAVE v2.0 hardening pass: + - Limit Dave Factor callouts to 1–2 per dossier (keep highest-signal sections). + - Deduplicate Mermaid diagrams: render each unique diagram once in an Annex and reference it in-body. + """ + + lines = md.splitlines() + + # Identify the footer separator (last '---') so we can insert an Annex above it. + footer_sep_idx = None + in_fence = False + for idx, ln in enumerate(lines): + stripped = ln.strip() + if stripped.startswith("```"): + in_fence = not in_fence + continue + if in_fence: + continue + if stripped == "---": + footer_sep_idx = idx + if footer_sep_idx is None: + footer_sep_idx = len(lines) + + # 1) Callout budget: find callout blocks and keep up to 2. + callouts: list[dict] = [] + current_section = "" + i = 0 + while i < len(lines): + ln = lines[i] + stripped = ln.strip() + if stripped.startswith("## "): + current_section = stripped[3:].strip() + is_callout = stripped.startswith("> **The Dave Factor:**") or stripped.startswith("> **Le facteur Dave :**") + if is_callout: + start = i + j = i + while j < len(lines) and lines[j].strip().startswith(">"): + j += 1 + block_text = "\n".join(lines[start:j]).strip() + callouts.append({"start": start, "end": j, "section": current_section, "text": block_text}) + i = j + continue + i += 1 + + def score_callout(section_title: str, text: str) -> int: + s = (section_title or "").upper() + t = (text or "").lower() + score = 0 + if any(k in s for k in ["REQUEST EVIDENCE", "ACCESS REQUEST", "LOCAL SECURITY", "REQUEST EVIDENCE"]): + score += 120 + if "screenshot" in t or "attestation" in t: + score += 110 + if "audit" in s or "compliance" in s: + score += 95 + if "training" in s or "quiz" in t or "certificate" in t: + score += 75 + if any(k in t for k in ["fips", "piv", "fido", "aal"]): + score += 70 + if "roadmap" in t or "alignment session" in t: + score += 25 + return score + + keep_callouts: set[int] = set() + if callouts: + ranked = sorted( + enumerate(callouts), + key=lambda it: (-score_callout(it[1]["section"], it[1]["text"]), it[1]["start"]), + ) + seen_hashes: set[str] = set() + for idx, c in ranked: + sig_lines: list[str] = [] + for ln in str(c["text"]).splitlines(): + s = ln.strip() + if s.startswith("> **The Dave Factor:**") or s.startswith("> **Le facteur Dave :**"): + sig_lines.append(s) + if s.startswith("> **Countermeasure:**") or s.startswith("> **Contre-mesure :**"): + sig_lines.append(s) + if len(sig_lines) >= 2: + break + signature = "\n".join(sig_lines).strip() or str(c["text"]).strip() + h = _sha256_text(signature) + if h in seen_hashes: + continue + keep_callouts.add(idx) + seen_hashes.add(h) + if len(keep_callouts) >= 2: + break + + # 2) Mermaid dedupe: collect all mermaid code fences, remove in-body blocks and replace with references. + diagrams: list[dict] = [] + current_section = "" + in_other_fence = False + i = 0 + while i < len(lines): + stripped = lines[i].strip() + if stripped.startswith("```") and stripped != "```mermaid": + in_other_fence = not in_other_fence + i += 1 + continue + if in_other_fence: + i += 1 + continue + if stripped.startswith("## "): + current_section = stripped[3:].strip() + i += 1 + continue + if stripped == "```mermaid": + start = i + j = i + 1 + while j < len(lines) and lines[j].strip() != "```": + j += 1 + end = min(len(lines), j + 1) + raw = "\n".join(lines[start + 1 : j]) + norm = _normalize_mermaid_block(raw) + if norm: + diagrams.append( + { + "start": start, + "end": end, + "section": current_section, + "raw": raw.rstrip(), + "norm": norm, + } + ) + i = end + continue + i += 1 + + unique_diagrams: list[dict] = [] + seen: set[str] = set() + for dgm in diagrams: + if dgm["norm"] in seen: + continue + seen.add(dgm["norm"]) + unique_diagrams.append(dgm) + + # Guarantee at least two diagrams by adding safe inferred defaults if needed. + def ensure_default_diagram(content: str) -> None: + norm = _normalize_mermaid_block(content) + if not norm or norm in seen: + return + seen.add(norm) + unique_diagrams.append({"raw": content.rstrip(), "norm": norm, "section": ""}) + + if len(unique_diagrams) < 2: + ensure_default_diagram( + "\n".join( + [ + "flowchart TD", + ' A["Control intent"] --> B["Manual evidence requested"]', + ' B --> C["Artifact produced"]', + ' C --> D["Dashboard goes green"]', + ' D --> E["Exceptions accumulate"]', + ' E --> F["Definition of compliance shifts"]', + " F --> B", + ] + ) + ) + if len(unique_diagrams) < 2: + ensure_default_diagram( + "\n".join( + [ + "stateDiagram-v2", + " [*] --> Requested", + ' Requested --> PendingReview: \"needs alignment\"', + " PendingReview --> PendingReview: renewal", + " PendingReview --> Approved: silence", + ' Approved --> Approved: \"temporary\" extension', + ] + ) + ) + + # Build stable diagram names. + label_counts: dict[str, int] = {} + diagram_names: dict[str, str] = {} + for dgm in unique_diagrams: + label = _diagram_label(dgm.get("raw", ""), locale=locale) + label_counts[label] = label_counts.get(label, 0) + 1 + used_seq: dict[str, int] = {} + for dgm in unique_diagrams: + label = _diagram_label(dgm.get("raw", ""), locale=locale) + used_seq[label] = used_seq.get(label, 0) + 1 + suffix = f" ({used_seq[label]})" if label_counts.get(label, 0) > 1 else "" + diagram_names[dgm["norm"]] = f"{label}{suffix}" + + # Rebuild document: remove callouts beyond budget, remove mermaid blocks, and insert Annex before footer. + remove_ranges: list[tuple[int, int]] = [] + for idx, c in enumerate(callouts): + if idx not in keep_callouts: + remove_ranges.append((c["start"], c["end"])) + for dgm in diagrams: + # Remove the optional diagram heading directly above inferred diagrams (best effort). + start = dgm["start"] + maybe_heading = start - 2 + if maybe_heading >= 0: + h0 = lines[maybe_heading].strip() + h1 = lines[maybe_heading + 1].strip() if maybe_heading + 1 < len(lines) else "" + if h0.startswith("###") and "diagram" in h0.lower() and h1 == "": + start = maybe_heading + remove_ranges.append((start, dgm["end"])) + + # Merge / normalize ranges. + remove_ranges.sort() + merged: list[tuple[int, int]] = [] + for start, end in remove_ranges: + if not merged or start > merged[-1][1]: + merged.append((start, end)) + else: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + + out_lines: list[str] = [] + i = 0 + range_idx = 0 + referenced: set[str] = set() + while i < len(lines): + if range_idx < len(merged) and i == merged[range_idx][0]: + end = merged[range_idx][1] + # If this range was a diagram, replace with a reference line (based on the diagram norm if we can find it). + # Best effort: find the mermaid start inside this range. + ref = None + for dgm in diagrams: + if dgm["start"] >= merged[range_idx][0] and dgm["end"] <= merged[range_idx][1]: + name = diagram_names.get(dgm["norm"]) + if name: + if dgm["norm"] not in referenced: + ref = f"See Annex: {name}." + referenced.add(dgm["norm"]) + break + if ref: + out_lines.append(ref) + out_lines.append("") + i = end + range_idx += 1 + continue + out_lines.append(lines[i]) + i += 1 + + # Remove empty/legacy inferred-diagram annex headings (we insert our own). + legacy_annex_titles = { + "## Annex (inferred diagrams)", + "## Annexes (diagrammes inférés)", + } + cleaned: list[str] = [] + in_legacy_annex = False + for ln in out_lines: + stripped = ln.strip() + if stripped in legacy_annex_titles: + in_legacy_annex = True + continue + if in_legacy_annex and stripped == "---": + in_legacy_annex = False + cleaned.append(ln) + continue + if in_legacy_annex and stripped.startswith("## "): + in_legacy_annex = False + if in_legacy_annex: + continue + cleaned.append(ln) + + # Insert annex right before footer separator. + footer_sep_idx = None + in_fence = False + for idx, ln in enumerate(cleaned): + stripped = ln.strip() + if stripped.startswith("```"): + in_fence = not in_fence + continue + if in_fence: + continue + if stripped == "---": + footer_sep_idx = idx + if footer_sep_idx is None: + footer_sep_idx = len(cleaned) + + if locale.lower().startswith("fr"): + annex_title = "## Annexes (actifs partagés)" + annex_note = "_Diagrammes dédupliqués : chaque schéma unique est rendu une fois ici ; les sections y renvoient._" + diag_title = "### Diagrammes (dédupliqués)" + else: + annex_title = "## Annex (shared assets)" + annex_note = "_Deduped diagrams: each unique diagram is rendered once here; sections reference it by name._" + diag_title = "### Diagrams (deduped)" + + annex_lines = [annex_title, "", annex_note, "", diag_title, ""] + for dgm in unique_diagrams: + name = diagram_names.get(dgm["norm"]) or _diagram_label(dgm.get("raw", ""), locale=locale) + annex_lines.extend([f"#### {name}", "", "```mermaid", dgm.get("raw", "").rstrip(), "```", ""]) + + final_lines = cleaned[:footer_sep_idx] + [""] + annex_lines + cleaned[footer_sep_idx:] + return "\n".join([ln.rstrip() for ln in final_lines]).strip() + "\n" + + +def _merge_consecutive_sections_by_title(sections: list[_SourceSection]) -> list[_SourceSection]: + """ + Extraction sometimes yields many page-level "sections" with the same repeated header title. + For v2.0+ we merge consecutive equal-titled sections to prevent template repetition. + """ + + if len(sections) < 3: + return sections + + def norm(title: str) -> str: + s = " ".join((title or "").split()).strip() + s = re.sub(r"https?://\\S+", "", s).strip() + s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip() + s = " ".join(s.split()).strip() + return s.upper() + + merged: list[_SourceSection] = [sections[0]] + for sec in sections[1:]: + if merged and norm(sec.title) == norm(merged[-1].title): + prev = merged[-1] + body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip() + why_prev = (prev.why_it_matters or "").strip() + why_new = (sec.why_it_matters or "").strip() + why = why_prev or why_new or None + if why_prev and why_new and why_new not in why_prev: + why = "\n".join([why_prev, why_new]).strip() + merged[-1] = _SourceSection(title=prev.title, body=body, why_it_matters=why) + else: + merged.append(sec) + return merged + + +def _merge_repeated_titles_globally(sections: list[_SourceSection], *, min_repeats: int = 3) -> list[_SourceSection]: + """ + If a title repeats many times across extracted sections, it's usually a page header. + Merge all instances into the first occurrence to avoid template repetition. + """ + + if len(sections) < 3: + return sections + + def norm(title: str) -> str: + s = " ".join((title or "").split()).strip() + s = re.sub(r"https?://\\S+", "", s).strip() + s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip() + s = " ".join(s.split()).strip() + return s.upper() + + counts: dict[str, int] = {} + for sec in sections[1:]: + key = norm(sec.title) + if not key: + continue + counts[key] = counts.get(key, 0) + 1 + + repeated = {k for k, n in counts.items() if n >= min_repeats} + if not repeated: + return sections + + out: list[_SourceSection] = [sections[0]] + merged_by_title: dict[str, _SourceSection] = {} + out_idx_by_title: dict[str, int] = {} + for sec in sections[1:]: + key = norm(sec.title) + if key in repeated: + if key not in out_idx_by_title: + out_idx_by_title[key] = len(out) + merged_by_title[key] = sec + out.append(sec) + else: + out_idx = out_idx_by_title[key] + prev = merged_by_title[key] + body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip() + why_prev = (prev.why_it_matters or "").strip() + why_new = (sec.why_it_matters or "").strip() + why = why_prev or why_new or None + if why_prev and why_new and why_new not in why_prev: + why = "\n".join([why_prev, why_new]).strip() + merged = _SourceSection(title=prev.title, body=body, why_it_matters=why) + merged_by_title[key] = merged + out[out_idx] = merged + else: + out.append(sec) + return out + + def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str, style_version: str = "v1.8") -> str: today = _dt.date.today().isoformat() normalized = _normalize_ocr(source_text) @@ -3519,6 +4246,9 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac action_pack_enabled = (not _truthy_env("REVOICE_NO_ACTION_PACK")) or bool(action_pack) or _truthy_env("REVOICE_ACTION_PACK") sections = _extract_sections(normalized) + if style_version == "v2.0": + sections = _merge_consecutive_sections_by_title(sections) + sections = _merge_repeated_titles_globally(sections) if not sections: raise ValueError("No content extracted from source") if len(sections) == 1: @@ -3616,8 +4346,15 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac f"## {cover_h1}", ] ) + cover_h2_out = "" if cover_h2: - out.extend([f"### {cover_h2}", ""]) + if style_version == "v2.0": + if not _looks_like_cover_subtitle_noise(cover_h2): + cover_h2_out = _compact_title(cover_h2, max_chars=90) + else: + cover_h2_out = cover_h2 + if cover_h2_out: + out.extend([f"### {cover_h2_out}", ""]) else: out.append("") @@ -3663,7 +4400,10 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac out.extend([table, ""]) if action_pack_enabled: - out.append(_render_action_pack(sections[1:])) + if style_version == "v2.0": + out.append(_render_action_pack_v2_0(sections=sections[1:], normalized_text=normalized, locale=locale)) + else: + out.append(_render_action_pack(sections[1:])) out.append("") # v1.8+ requires >=2 Mermaid diagrams; add supplemental inferred diagrams only when needed. @@ -3735,4 +4475,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac ] ) - return "\n".join(out).strip() + "\n" + doc = "\n".join(out).strip() + "\n" + if style_version == "v2.0": + return _apply_dave_v2_0_postprocess(doc, locale=locale) + return doc diff --git a/tools/week_pack/build_week.py b/tools/week_pack/build_week.py index 47bdc07..36ab366 100644 --- a/tools/week_pack/build_week.py +++ b/tools/week_pack/build_week.py @@ -130,14 +130,32 @@ def _revoice_preflight(*, style: str, md_path: Path, source_path: Path) -> str: def _extract_first_claim(md: str) -> str: + claims: list[str] = [] for line in md.splitlines(): m = re.match(r"^- The source claims: [“\"](?P.+?)[”\"]\s*$", line.strip()) if m: - claim = m.group("q").strip() - if len(claim) > 160: - return claim[:157].rstrip() + "…" - return claim - return "" + claims.append(m.group("q").strip()) + + if not claims: + return "" + + def is_low_signal(claim: str) -> bool: + c = (claim or "").strip() + lower = c.lower() + if "datasheet" in lower: + return True + if "all rights reserved" in lower or "copyright" in lower: + return True + # Very short fragments tend to be headers/footers or OCR junk. + if len(c) < 40: + return True + return False + + # Prefer the first non-noise claim; fall back to the first claim if all are noisy. + chosen = next((c for c in claims if not is_low_signal(c)), claims[0]) + if len(chosen) > 160: + return chosen[:157].rstrip() + "…" + return chosen def _extract_first_dave_factor(md: str) -> str: @@ -177,7 +195,8 @@ def _write_marketing( day_upper = day.day.upper() next_label = f"{next_day.day.upper()} — {next_day.edition} {next_day.hashtag}" if next_day else "Next week: new drops." dave_factor = _extract_first_dave_factor(dossier_md) or "The control drifts into a status update, and the status update becomes the control." - claim = _extract_first_claim(dossier_md) or "(no short claim extracted)" + claim = _extract_first_claim(dossier_md) + quote_bullet = f"- The source claims: “{claim}”" if claim else "- (Add one short measurable source quote from the dossier’s Claims Register.)" lines = [ f"# Thread Pack — {day_upper} ({day.edition} Edition)", @@ -207,7 +226,7 @@ def _write_marketing( "", "## Post 3 — The Source Claims (quote-budget)", "", - f"- The source claims: “{claim}”", + quote_bullet, "", "## Post 4 — Sponsor Bumper (mid-thread)", "", @@ -602,14 +621,14 @@ def main() -> int: ) # Week index + full pack. - m = re.search(r"(v\\d+(?:\\.\\d+)*)", args.style) + m = re.search(r"(v\d+(?:\.\d+)*)", args.style) week_title = m.group(1) if m else args.style index_path = build_dir / "index.md" _write_week_index(out_path=index_path, week_title=week_title, base_url=args.base_url, days=ordered, source_links=source_links) week_pack_path = build_dir / "week.pack.md" body_parts = [ - "# InfraFabric External Review Pack — Full Week (v1.9)", + f"# InfraFabric External Review Pack — Full Week ({week_title})", "", "This file embeds all daily packs for sandboxed review environments. Review one day at a time.", "",