IF.TTT: improve reviewability + thread pack extraction

This commit is contained in:
danny 2025-12-28 22:05:06 +00:00
parent bac86571a8
commit b644130412
4 changed files with 1206 additions and 63 deletions

View file

@ -22,11 +22,52 @@ There is a sync job that mirrors `https://git.infrafabric.io/danny/hosted.git` i
**Important:** The sync uses `rsync --delete`, so anything not in the mirrored repo would normally be removed. To keep operator-generated review artifacts stable, the sync script now excludes:
- `bibles/`
- `review/`
- `iftrace.py` (operator-maintained; dont overwrite via repo sync)
So **publish operator-generated bibles/review packs under**:
- `/srv/hosted-static/public/bibles/…`
- `/srv/hosted-static/public/review/…`
## HTML-only sandbox fallback (new; critical for external reviewers)
Some LLM “web fetchers” can load HTML but fail on `.md/.py/.tar.gz`. To keep the IF.TTT “open governance” premise intact for external review:
- Keep the raw assets (`.md`, `.tar.gz`) **and** provide an **HTML view** on the same stable alias surface.
- Share surface:
- Raw pack: `/static/pack/<shareId>.md`
- HTML pack view: `/static/pack/<shareId>`
- Raw review pack: `/static/review/<shareId>.md` (alt: `/static/review-pack/<shareId>.md`)
- HTML review pack view: `/static/review/<shareId>` (alt: `/static/review-pack/<shareId>`)
- Raw marketing: `/static/marketing/<shareId>.md`
- HTML marketing view: `/static/marketing/<shareId>`
- Hosted review artifacts (`/static/hosted/review/**`) also have `.html` wrappers generated post-sync.
Implementation notes:
- Caddy rewrites `/static/*` (HTML view endpoints) to the red-team app (`pct 212`).
- Hosted `.html` wrappers are generated by `pct 210:/usr/local/bin/hosted_static_build_html_wrappers.py` after each sync.
## Full stack + links (operator reference)
- `/root/docs/19-ifttt-full-stack-and-working-links.md` is the “single page” reference for:
- Which apps run where (pct IDs + IPs)
- Which URLs are canonical for sharing
- Copy/paste-safe example links
- IF.TTT public overview page (hosted-static): https://infrafabric.io/static/hosted/ifttt/
## IF.TTT paper update review pack (known-good example)
Use this pack when requesting external critique of the IF.TTT paper update (receipt-first chronology + public receipts + triage bundles):
- Landing: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/`
- Pack (MD): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.md`
- Pack (HTML): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.html`
- Pack (tar.gz): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz`
- Pack hash: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz.sha256`
- Triage selector demo (canonical): `https://infrafabric.io/static/hosted/review/trace-bundles/d70ed99a/index.md`
- Offline verifier: `https://infrafabric.io/static/hosted/iftrace.py`
Note: some LLM “web fetchers” reject `.tar.gz` with a client-side `415` even when browsers/curl succeed; use the `.html` pack in those environments.
## Week review packs (v1.8)
Week v1.8 packs are published here:

View file

@ -8,6 +8,24 @@ import MarkdownIt from "markdown-it";
import express from "express";
import multer from "multer";
/*
Public, no-login receipt surface (IF.TTT)
----------------------------------------
This server exposes Shadow Dossiers and their "receipt" artifacts via two parallel
representations:
- Raw (download-friendly): `*.md` (and tarballs elsewhere)
- HTML views: same path without the `.md` suffix
Rationale: some external review environments (including certain LLM "web fetchers")
reliably load `text/html` but may reject "downloadable" assets like `.md/.py/.tar.gz`.
Keeping both surfaces makes the governance/receipt premise reviewable by humans *and*
restricted sandboxes.
Deployment detail: the stable public aliases live under `/static/*` on the public
domain and are reverse-proxied here (see operator docs: `/root/docs/17-ifttt-public-receipt-surface.md`).
*/
const __filename = url.fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
@ -145,6 +163,7 @@ function renderTraceHeaderHtml({ verification, job }) {
const createdAt = job?.createdAt ? String(job.createdAt) : "";
const traceId = String(job?.id || "");
const style = String(job?.style || "");
const ttt = job?._ttt_trace_receipt || null;
const checks = verification?.checks || {};
const outputOk = checks.outputOk === true;
@ -154,6 +173,11 @@ function renderTraceHeaderHtml({ verification, job }) {
const outputLabel = outputOk ? "PASS" : "FAIL";
const sourceLabel = sourceOk === true ? "PASS" : sourceOk === false ? "FAIL" : "UNKNOWN";
const quantumReady = Boolean(ttt && ttt.quantum_ready === true);
const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : "";
const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : "";
const pqLabel = quantumReady ? `READY${pqAlgo ? ` (${pqAlgo})` : ""}` : ttt ? "ABSENT" : "UNKNOWN";
const safeCreatedAt = createdAt ? escapeHtml(createdAt) : "";
const safeTraceId = traceId ? escapeHtml(traceId) : "";
const safeStyle = style ? escapeHtml(style) : "";
@ -171,6 +195,7 @@ function renderTraceHeaderHtml({ verification, job }) {
` <ul class="trace-checks">`,
` <li>Output hash check: <strong>${escapeHtml(outputLabel)}</strong></li>`,
` <li>Source hash check: <strong>${escapeHtml(sourceLabel)}</strong></li>`,
` <li>Quantum-ready receipt: <strong>${escapeHtml(pqLabel)}</strong>${pqStatus ? ` <small>(${escapeHtml(pqStatus)})</small>` : ""}</li>`,
` <li>Quality warnings: <strong>${warningsPresent ? "present" : "none recorded"}</strong></li>`,
` </ul>`,
`</div>`,
@ -258,11 +283,30 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
const createdAt = job?.createdAt ? String(job.createdAt) : "";
const status = job?.status ? String(job.status) : "";
const warningsPresent = Boolean(job?.warnings && String(job.warnings).trim());
const ttt = job?._ttt_trace_receipt || job?.tttTraceReceipt || null;
const tttId = ttt && ttt.id ? String(ttt.id) : "";
const tttHash = ttt && ttt.content_hash ? String(ttt.content_hash) : "";
const pqReady = Boolean(ttt && ttt.quantum_ready === true);
const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : "";
const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : "";
const traceId = String(job?.id || "").trim();
const tracePrefixRaw = traceId ? traceId.split("-")[0] : "";
const tracePrefix = /^[0-9a-f]{8}$/i.test(tracePrefixRaw) ? tracePrefixRaw.toLowerCase() : "";
const triageSelectorUrl = tracePrefix
? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.html`
: "";
const triageSelectorUrlRaw = tracePrefix
? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.md`
: "";
const dossierUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}`;
const traceUrl = `${primaryBase}/static/trace/${encodeURIComponent(shareId)}`;
const downloadUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}/download`;
const packUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}.md`;
const packHtmlUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}`;
const reviewHtmlUrl = `${primaryBase}/static/review/${encodeURIComponent(shareId)}`;
const marketingHtmlUrl = `${primaryBase}/static/marketing/${encodeURIComponent(shareId)}`;
const sourceUrl = job?.sourceSha256
? `${primaryBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}`
: "";
@ -272,6 +316,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
const directTraceUrl = `${directBase}/r/${encodeURIComponent(shareId)}/trace`;
const directDownloadUrl = `${directBase}/r/${encodeURIComponent(shareId)}/download`;
const directPackUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack.md`;
const directPackHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack`;
const directReviewHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/review-pack`;
const directMarketingHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/marketing`;
const lastResortBase = normalizeBaseUrl(publicBaseUrl);
const lastResortDossierUrl = lastResortBase ? `${lastResortBase}/r/${encodeURIComponent(shareId)}` : "";
@ -289,6 +336,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
"- You can independently verify the downloaded dossier Markdown by hashing it and comparing to `Output sha256` below.",
"- You can independently verify the hosted source file (if present) by hashing it and comparing to `Source sha256` below.",
"- This page binds those two fingerprints together as a single public evidence record.",
pqReady
? `- This trace also has a **Quantum-ready** signed receipt record (${pqAlgo || "PQ"}; ${pqStatus || "hybrid"}).`
: "- This trace does not claim any post-quantum proof unless the header says QUANTUM READY.",
"",
"## What this trace does not prove",
"",
@ -301,6 +351,11 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
`- Dossier (rendered): ${dossierUrl}`,
`- Dossier (download Markdown): ${downloadUrl}`,
`- Single-file pack (review + dossier + trace): ${packUrl}`,
`- Pack (HTML view; for restrictive sandboxes): ${packHtmlUrl}`,
`- Review pack (HTML view; links-only): ${reviewHtmlUrl}`,
`- Marketing excerpt (HTML view): ${marketingHtmlUrl}`,
triageSelectorUrl ? `- Offline bundles (triage selector): ${triageSelectorUrl}` : null,
triageSelectorUrlRaw ? `- Offline bundles (raw Markdown): ${triageSelectorUrlRaw}` : null,
sourceUrl ? `- Source (PDF): ${sourceUrl}` : null,
`- This trace page: ${traceUrl}`,
mirrorBase ? "" : null,
@ -308,6 +363,7 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
mirrorBase ? "" : null,
mirrorBase ? `- Dossier: ${mirrorBase}/static/dossier/${encodeURIComponent(shareId)}` : null,
mirrorBase ? `- Pack: ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}.md` : null,
mirrorBase ? `- Pack (HTML view): ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}` : null,
mirrorBase ? `- Trace: ${mirrorBase}/static/trace/${encodeURIComponent(shareId)}` : null,
mirrorBase && sourceUrl ? `- Source: ${mirrorBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}` : null,
"",
@ -316,6 +372,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
`- Dossier: ${directDossierUrl}`,
`- Download: ${directDownloadUrl}`,
`- Pack: ${directPackUrl}`,
`- Pack (HTML view): ${directPackHtmlUrl}`,
`- Review pack (HTML view): ${directReviewHtmlUrl}`,
`- Marketing excerpt (HTML view): ${directMarketingHtmlUrl}`,
`- Trace: ${directTraceUrl}`,
lastResortBase && lastResortBase !== directBase ? "" : null,
lastResortBase && lastResortBase !== directBase ? "## Last resort (alternate host)" : null,
@ -338,6 +397,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
`- Source sha256: \`${job.sourceSha256 || ""}\``,
`- Style: \`${job.style || ""}\``,
`- Source bytes: \`${String(job.sourceBytes ?? "")}\``,
tttId ? `- Signed trace receipt ID: \`${tttId}\`` : null,
tttHash ? `- Signed trace receipt hash: \`${tttHash}\`` : null,
pqStatus ? `- PQ status: \`${pqStatus}\`` : null,
"",
"## How to verify (locally)",
"",
@ -705,6 +767,140 @@ function staticPublicBaseUrlForRequest(req, fallbackPublicBaseUrl) {
return publicBaseFromRequest(req, fallbackPublicBaseUrl);
}
function tttRegistryBaseUrl() {
const explicit = String(process.env.TTT_REGISTRY_BASE_URL || "").trim();
return explicit ? explicit.replace(/\/+$/g, "") : "";
}
function tttRegistryApiToken() {
return String(process.env.TTT_API_TOKEN || "").trim();
}
async function fetchJson(url, { method = "GET", headers, body, timeoutMs = 4500 } = {}) {
const controller = new AbortController();
const t = setTimeout(() => controller.abort(), timeoutMs);
try {
const resp = await fetch(url, { method, headers, body, signal: controller.signal });
const text = await resp.text();
let data = null;
try {
data = JSON.parse(text);
} catch {
data = { raw: text };
}
return { ok: resp.ok, status: resp.status, data };
} finally {
clearTimeout(t);
}
}
function traceReceiptRecordId(traceId) {
const id = String(traceId || "").trim();
if (!looksLikeUuid(id)) return "";
return `if://trace/${id}/v1`;
}
function buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl }) {
const base = normalizeBaseUrl(staticPublicBaseUrl || process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io");
const sid = String(shareId || "").trim();
const traceId = String(job?.id || "").trim();
const sourceExt = String(job?.sourcePath ? path.extname(job.sourcePath) : "").toLowerCase() || ".pdf";
const sourceUrl = job?.sourceSha256 ? `${base}/static/source/${job.sourceSha256}${sourceExt}` : "";
return {
share_id: sid,
trace_id: traceId,
created_at: job?.createdAt || "",
style: job?.style || "",
source_sha256: job?.sourceSha256 || "",
output_sha256: job?.outputSha256 || "",
urls: {
pack_md: `${base}/static/pack/${encodeURIComponent(sid)}.md`,
pack_html: `${base}/static/pack/${encodeURIComponent(sid)}`,
review_html: `${base}/static/review/${encodeURIComponent(sid)}`,
marketing_html: `${base}/static/marketing/${encodeURIComponent(sid)}`,
dossier_html: `${base}/static/dossier/${encodeURIComponent(sid)}`,
dossier_md: `${base}/static/dossier/${encodeURIComponent(sid)}/download`,
trace_html: `${base}/static/trace/${encodeURIComponent(sid)}`,
source_pdf: sourceUrl,
},
};
}
function summarizeTttRecord(record) {
const rec = record && typeof record === "object" ? record : null;
if (!rec) return null;
const pqStatus = String(rec.pq_status || "").trim();
const pqAlgo = String(rec.pq_algo || "").trim();
const signaturePqPresent = Boolean(rec.signature_pq);
return {
id: String(rec.id || "").trim(),
content_hash: String(rec.content_hash || "").trim(),
signer: String(rec.signer || "").trim(),
pq_status: pqStatus,
pq_algo: pqAlgo,
pq_signature_present: signaturePqPresent,
quantum_ready: signaturePqPresent && pqStatus !== "classical-only",
};
}
async function fetchTttRecordById(recordId) {
const base = tttRegistryBaseUrl();
if (!base || !recordId) return { ok: false, status: 0, record: null };
const u = `${base}/v1/citation?id=${encodeURIComponent(recordId)}`;
try {
const { ok, status, data } = await fetchJson(u, { method: "GET" });
const verified = Boolean(data && data.verified === true);
const record = verified && data && data.record ? data.record : null;
return { ok: ok && verified, status, record };
} catch {
return { ok: false, status: 0, record: null };
}
}
async function upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl }) {
const base = tttRegistryBaseUrl();
if (!base) return { ok: false, status: 0, record: null, mode: "disabled" };
const rid = traceReceiptRecordId(job?.id);
if (!rid) return { ok: false, status: 0, record: null, mode: "invalid_trace_id" };
// Best effort: read-only GET first (no token required).
const existing = await fetchTttRecordById(rid);
if (existing.ok && existing.record) return { ok: true, status: 200, record: existing.record, mode: "found" };
const token = tttRegistryApiToken();
if (!token) return { ok: false, status: 0, record: null, mode: "no_token" };
const evidence = buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl });
const claim = `IF.TTT trace receipt for shareId=${shareId} trace_id=${job.id}`;
const payload = {
id: rid,
claim,
evidence,
timestamp: job?.createdAt || undefined,
};
const url = `${base}/v1/citation`;
try {
const { ok, status, data } = await fetchJson(url, {
method: "POST",
headers: {
Authorization: `Bearer ${token}`,
"Content-Type": "application/json; charset=utf-8",
},
body: JSON.stringify(payload),
timeoutMs: 6500,
});
if (!ok || !data || !data.record) return { ok: false, status, record: null, mode: "create_failed" };
return { ok: true, status, record: data.record, mode: "created" };
} catch {
return { ok: false, status: 0, record: null, mode: "create_failed" };
}
}
function looksLikeUuid(value) {
return /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.test(String(value || ""));
}
@ -1046,11 +1242,27 @@ function main() {
const job = readJob(jobsDir, share.jobId);
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir });
const jobForRender = { ...job, _verification: verification };
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
// Best-effort: attach a registry-signed trace receipt record so we can render
// black/white “QUANTUM READY” without over-claiming. If the registry is
// unreachable, we still render the classic hash receipt.
let tttTraceReceipt = job.tttTraceReceipt || null;
if (!tttTraceReceipt || !tttTraceReceipt.id || !tttTraceReceipt.content_hash) {
const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl });
if (ttt.ok && ttt.record) {
tttTraceReceipt = summarizeTttRecord(ttt.record);
if (tttTraceReceipt) {
job.tttTraceReceipt = tttTraceReceipt;
writeJob(jobsDir, job);
}
}
}
const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir });
const jobForRender = { ...job, _verification: verification, _ttt_trace_receipt: tttTraceReceipt };
const md = renderTraceMarkdown({ shareId, job: jobForRender, publicBaseUrl, staticPublicBaseUrl });
const html = markdown.render(md);
@ -1059,7 +1271,10 @@ function main() {
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/download">Download Markdown</a>`,
job.sourcePath ? `<a href="/r/${encodeURIComponent(shareId)}/source">Download source</a>` : "",
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Review pack (MD)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Single-file pack (MD)</a>`,
]
.filter(Boolean)
.join(" · ");
@ -1083,6 +1298,9 @@ function main() {
res.redirect(302, staticFile.urlPath);
});
// NOTE: These routes intentionally come in pairs:
// - `*.md` is the raw, download-friendly artifact
// - same path without `.md` is the HTML view (for HTML-only sandboxes)
app.get("/r/:shareId/review-pack.md", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
@ -1106,6 +1324,33 @@ function main() {
.send(md);
});
app.get("/r/:shareId/review-pack", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
const share = readShare(sharesDir, shareId);
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
const job = readJob(jobsDir, share.jobId);
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html");
const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId);
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
const md = renderReviewPackMarkdown({ shareId, job, publicBaseUrl, externalReviewUrl, staticSourceUrl, staticPublicBaseUrl });
const html = markdown.render(md);
const topLinks = [
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Raw Markdown</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
]
.filter(Boolean)
.join(" · ");
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Review pack", html, topLinksHtml: topLinks }));
});
app.get("/r/:shareId/pack.md", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
@ -1139,6 +1384,46 @@ function main() {
res.status(200).type("text/markdown; charset=utf-8").send(md);
});
app.get("/r/:shareId/pack", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
const share = readShare(sharesDir, shareId);
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
const job = readJob(jobsDir, share.jobId);
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
const abs = path.resolve(projectRoot, job.outputPath);
if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path");
if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found");
const dossierMarkdown = fs.readFileSync(abs, "utf8");
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html");
const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId);
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
const md = renderSingleFilePackMarkdown({
shareId,
job,
publicBaseUrl,
externalReviewUrl,
staticSourceUrl,
staticPublicBaseUrl,
dossierMarkdown,
});
const html = markdown.render(md);
const topLinks = [
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Raw Markdown</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
]
.filter(Boolean)
.join(" · ");
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Single-file pack", html, topLinksHtml: topLinks }));
});
app.get("/r/:shareId/marketing.md", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
@ -1169,6 +1454,43 @@ function main() {
res.status(200).type("text/markdown; charset=utf-8").send(md);
});
app.get("/r/:shareId/marketing", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
const share = readShare(sharesDir, shareId);
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
const job = readJob(jobsDir, share.jobId);
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
const abs = path.resolve(projectRoot, job.outputPath);
if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path");
if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found");
const dossierMarkdown = fs.readFileSync(abs, "utf8");
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
const md = renderMarketingPackMarkdown({
shareId,
job,
publicBaseUrl,
staticPublicBaseUrl,
staticSourceUrl,
dossierMarkdown,
});
const html = markdown.render(md);
const topLinks = [
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/marketing.md">Raw Markdown</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
]
.filter(Boolean)
.join(" · ");
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Marketing excerpt", html, topLinksHtml: topLinks }));
});
app.get("/r/:shareId", (req, res) => {
const shareId = String(req.params.shareId || "").trim();
if (!shareId) return res.status(404).type("text/plain").send("Not found");
@ -1189,7 +1511,11 @@ function main() {
`<a href="/r/${encodeURIComponent(shareId)}/download">Download Markdown</a>`,
job.sourcePath ? `<a href="/r/${encodeURIComponent(shareId)}/source">Download source</a>` : "",
`<a href="/r/${encodeURIComponent(shareId)}/trace">IF.TTT trace</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Review pack (MD)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Single-file pack (MD)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/marketing">Marketing excerpt (HTML)</a>`,
`<a href="/r/${encodeURIComponent(shareId)}/marketing.md">Marketing excerpt (MD)</a>`,
externalReviewUrl ? `<a href="${escapeHtml(externalReviewUrl)}" target="_blank" rel="noreferrer">Feedback intake (login)</a>` : "",
]
@ -1286,6 +1612,20 @@ function main() {
job.warnings = warnings ? warnings.trim() : "";
job.outputSha256 = await sha256File(absOutputPath);
job.status = job.warnings ? "done_with_warnings" : "done";
// Best-effort: create a registry-signed trace receipt record (PQ-capable).
// This must never block publishing; failures degrade gracefully.
try {
const staticPublicBaseUrl = normalizeBaseUrl(process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io");
const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl });
if (ttt.ok && ttt.record) {
const summary = summarizeTttRecord(ttt.record);
if (summary) job.tttTraceReceipt = summary;
}
} catch {
// ignore
}
writeJob(jobsDir, job);
} catch (e) {
job.status = "error";

View file

@ -287,6 +287,42 @@ def _looks_like_navigation_heavy_source(text: str) -> bool:
return marker_hits >= 6
def _looks_like_cover_subtitle_noise(value: str) -> bool:
"""
Heuristic: cover subtitles should be "title-ish" (short, headline-like),
not body sentences, author blocks, or explanatory prose.
"""
s = " ".join((value or "").split()).strip()
if not s:
return True
# Body fragments often start mid-sentence (lowercase).
if re.match(r"^[a-z]", s):
return True
# Cover subtitles should be short; long multi-clause prose is usually body copy.
if len(s.split()) > 18:
return True
# Long prose ending with a period is rarely a subtitle for these sources.
if s.endswith(".") and len(s) > 60:
return True
low = s.lower()
if "from left to right" in low:
return True
# Author/credential blocks (common in analyst PDFs) aren't useful as subtitles.
if re.search(r"\b(cissp|ccsk|phd|research director|business value manager)\b", low):
return True
# Many commas in a long line suggests author list / affiliations.
if s.count(",") >= 3 and len(s) > 80:
return True
return False
def _extract_urls(text: str) -> list[str]:
urls: list[str] = []
for match in _URL_RE.finditer(text):
@ -627,10 +663,59 @@ def _parse_title_block(lines: list[str]) -> tuple[str, int]:
while i < len(lines) and not lines[i].strip():
i += 1
title_lines: list[str] = []
# Title blocks should be short; OCR/PDF extraction sometimes concatenates body text into the "title".
# Heuristic: keep up to a few short lines and stop before body-like lines (long sentences, URLs, etc.).
max_title_lines = 3
max_title_words = 14
max_title_chars = 110
max_total_words = 18
max_total_chars = 120
total_words = 0
total_chars = 0
while i < len(lines) and lines[i].strip():
stripped = lines[i].strip()
if stripped.lower() != "snyk":
lower = stripped.lower()
if lower == "snyk":
i += 1
continue
# Skip common page-header noise (e.g., "… | Datasheet 1").
if "|" in stripped and "datasheet" in lower:
i += 1
continue
# If the very first non-empty line is already "body-like", synthesize a short title
# from it but keep the full line in the body (do not consume it).
word_count = len(stripped.split())
looks_body_like = (
len(stripped) > max_title_chars
or word_count > max_title_words
or bool(re.search(r"\\s{3,}", stripped))
or "http://" in lower
or "https://" in lower
or (stripped.endswith(".") and word_count > 8)
)
if not title_lines and looks_body_like:
title_lines.append(_compact_title(stripped, max_chars=72))
break
# Stop title capture when we hit body-like lines.
if title_lines:
if "http://" in lower or "https://" in lower:
break
if len(stripped) > max_title_chars:
break
if word_count > max_title_words:
break
if stripped.endswith(".") and word_count > 8:
break
# Global caps: don't let multiple short lines turn into a paragraph-sized title.
if total_words + word_count > max_total_words:
break
if total_chars + len(stripped) > max_total_chars:
break
if len(title_lines) >= max_title_lines:
break
title_lines.append(stripped)
total_words += len(stripped.split())
total_chars += len(stripped)
i += 1
while i < len(lines) and not lines[i].strip():
i += 1
@ -1547,10 +1632,12 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None:
)
if title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper:
is_llm_context = any(k in title_upper for k in ["LLM", "MODEL", "RAG", "PROMPT"])
return ctx.pick_unique(
kind="diagram:architecture",
key=title,
variants=[
(
"""flowchart TD
A["User"] --> B["App"]
B --> C["LLM"]
@ -1559,6 +1646,16 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None:
D --> F["External systems"]
E --> C
"""
if is_llm_context
else """flowchart TD
A["Actor"] --> B["Workflow / system"]
B --> C["Policy decision (rules)"]
C --> D["Gate: enforce / block"]
D --> E["Evidence signals (logs)"]
E --> F["Audit / review cycle"]
F --> C
"""
)
],
used=ctx.used_diagrams,
)
@ -2035,7 +2132,7 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
ctx=ctx,
key=section.title,
)
if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "access request", "screenshot"):
if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "screenshot", "attestation"):
return _daveify_callout_reframe(
"\n".join(
[
@ -2097,8 +2194,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
)
if ctx.locale.lower().startswith("fr"):
anchors = _extract_numeric_anchors(section.body, limit=2)
anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else ""
anchors = _extract_numeric_anchors(section.body, limit=4)
anchor = ""
for candidate in anchors:
# Skip citation years in callouts; they read like hallucinated trivia.
if re.fullmatch(r"20\d{2}", candidate):
continue
anchor = candidate
break
anchor_hint = f" ({anchor})" if anchor else ""
variants = [
"\n".join(
[
@ -2151,8 +2255,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
if not section.body.strip():
return None
anchors = _extract_numeric_anchors(excerpt, limit=2)
anchor_hint = f" (anchors: {', '.join(anchors)})" if anchors else ""
anchors = _extract_numeric_anchors(excerpt, limit=4)
anchor = ""
for candidate in anchors:
# Skip citation years in callouts; they read like hallucinated trivia.
if re.fullmatch(r"20\d{2}", candidate):
continue
anchor = candidate
break
anchor_hint = f" ({anchor})" if anchor else ""
variants = [
"\n".join(
[
@ -2194,8 +2305,14 @@ def _render_punchline_closer(section: _SourceSection, *, ctx: _RenderContext) ->
if not section.body.strip():
return None
anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=2)
anchor = anchors[0] if anchors else ""
anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=4)
anchor = ""
for candidate in anchors:
# Avoid anchoring punchlines to random citation years unless the year is actually part of the section title.
if re.fullmatch(r"20\d{2}", candidate) and candidate not in section.title:
continue
anchor = candidate
break
anchor_hint = f" ({anchor})" if anchor else ""
if ctx.locale.lower().startswith("fr"):
@ -2321,6 +2438,17 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
]
)
elif title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper:
if ctx.voice == "v2.0":
variants = [
"Architecture diagrams are where optimism meets the enforcement boundary (and quietly loses).",
"Architecture diagrams are forwardable; boundaries are enforceable. Dave prefers the version you can screenshot.",
"Architecture diagrams are the happy path. The first exception request is the real design review.",
"Architecture diagrams define components; governance defines who can bypass them. Only one survives audit week.",
"Architecture diagrams are the part everyone agrees on, until we name what blocks and who owns the exception path.",
]
key = f"{section.title}:{_sha256_text(section.body)[:8]}"
paragraphs.append(ctx.pick_unique(kind="paragraph:architecture", key=key, variants=variants, used=ctx.used_paragraphs))
else:
paragraphs.extend(
[
"Architecture diagrams are where optimism goes to be audited.",
@ -2363,6 +2491,17 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
]
)
elif "SECURITY TEAM" in title_upper or "SECURITY REVIEW" in title_upper:
if ctx.voice == "v2.0":
variants = [
'Security team efficiency is a legitimate goal, especially when queues become the organizations truth serum. The risk is claiming throughput without defining what “review complete” means or what evidence proves it.',
'Faster reviews are defensible; unmeasured reviews are theater. Define “complete,” define the evidence, and make drift visible before the next audit season.',
'If the security team is the bottleneck, speed matters. If speed is the metric, definitions matter: what counts as reviewed, and what signal proves it stayed reviewed?',
'Throughput improvements only count if “done” is defined. Otherwise we are measuring calendar velocity and calling it assurance.',
'Reducing review time is fine. Lets just avoid the classic move: declare success, then argue about the definition of “review” when incidents arrive.',
]
key = f"{section.title}:{_sha256_text(section.body)[:8]}"
paragraphs.append(ctx.pick_unique(kind="paragraph:sec_team", key=key, variants=variants, used=ctx.used_paragraphs))
else:
paragraphs.extend(
[
"Security team efficiency is a legitimate goal, especially when review queues become the organizational truth serum.",
@ -2517,41 +2656,41 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
anchors = _extract_numeric_anchors(section.body, limit=2)
if ctx.locale.lower().startswith("fr"):
anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else ""
display_title = _compact_title(section.title, max_chars=72)
variants = [
f"Nous sommes alignés sur **{section.title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt quen langage de confort.",
f"**{section.title}**{anchor_hint} est lendroit où la crédibilité se fabrique ; le risque « Dave » consiste à en faire une séance de ressenti plutôt quune frontière dapplication.",
f"Cette partie (**{section.title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.",
f"Dans **{section.title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.",
f"**{section.title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.",
f"Nous aimons lintention de **{section.title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.",
f"**{section.title}**{anchor_hint} promet du réalisme. Rendons-le mesurable : point de départ, écart, et un artefact de preuve qui ne nécessite pas un pèlerinage dans un dossier partagé.",
f"Voici **{section.title}**{anchor_hint} : la partie où nous sommes daccord en principe. Le geste red-team : saccorder aussi sur ce qui bloque, ce qui alerte, et qui détient lexception.",
f"Nous sommes alignés sur **{display_title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt quen langage de confort.",
f"Cette partie (**{display_title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.",
f"Dans **{display_title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.",
f"**{display_title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.",
f"Nous aimons lintention de **{display_title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.",
]
else:
anchor_hint = f" (notably: {', '.join(anchors)})" if anchors else ""
display_title = _compact_title(section.title, max_chars=72)
if ctx.voice == "v1.6":
variants = [
f"**{section.title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.",
f"Treat **{section.title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.",
f"**{section.title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.",
f"In **{section.title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.",
f"**{section.title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”",
f"**{display_title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.",
f"Treat **{display_title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.",
f"**{display_title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.",
f"In **{display_title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.",
f"**{display_title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”",
]
else:
variants = [
f"We are aligned on **{section.title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.",
f"**{section.title}** is where credibility is manufactured{anchor_hint}; the Dave failure mode is to treat it as a vibe check instead of a boundary on applicability.",
f"This section (**{section.title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.",
f"In **{section.title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.",
f"**{section.title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.",
f"We love the intent behind **{section.title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.",
f"**{section.title}**{anchor_hint} reads as a promise of realism. Make realism measurable: baseline, delta, and an evidence artifact that doesn't require a shared drive pilgrimage.",
f"This is **{section.title}**{anchor_hint}: the part where we agree in principle. The red-team ask is that we also agree on what blocks, what warns, and who owns the exception path.",
f"We are aligned on **{display_title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.",
f"This section (**{display_title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.",
f"In **{display_title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.",
f"**{display_title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.",
f"We love the intent behind **{display_title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.",
]
paragraphs.append(ctx.pick_unique(kind="paragraph:fallback", key=section.title, variants=variants, used=ctx.used_paragraphs))
out: list[str] = [f"## {section.title}"]
raw_title = section.title
heading_title = _compact_title(raw_title, max_chars=72) if ctx.voice == "v2.0" else raw_title
out: list[str] = [f"## {heading_title}"]
if heading_title != raw_title:
out.extend(["", f"> {raw_title}"])
if section.why_it_matters:
out.extend(["", section.why_it_matters, ""])
else:
@ -2867,6 +3006,158 @@ def _render_action_pack(sections: list[_SourceSection]) -> str:
return "\n".join(out).strip()
def _render_action_pack_v2_0(*, sections: list[_SourceSection], normalized_text: str, locale: str) -> str:
"""
IF.DAVE v2.0: reduce Action Pack boilerplate by selecting a small set of representative
sections (35) that cover distinct gates where possible.
"""
candidates = _action_pack_sections(sections)
if not candidates:
return ""
# Prefer breadth: pick one best section per gate (by body length), preserving first-seen gate order.
by_gate: dict[str, list[_SourceSection]] = {}
gate_order: list[str] = []
for sec in candidates:
gate = _action_pack_gate(sec)
if gate not in by_gate:
by_gate[gate] = []
gate_order.append(gate)
by_gate[gate].append(sec)
selected: list[_SourceSection] = []
for gate in gate_order:
secs = sorted(by_gate[gate], key=lambda s: len((s.body or "").strip()), reverse=True)
if secs:
selected.append(secs[0])
if len(selected) >= 5:
break
# If we have <3 distinct gates, pad with longest remaining sections (still capped at 5).
if len(selected) < 3:
def title_key(sec: _SourceSection) -> str:
return " ".join((sec.title or "").split()).strip().upper()
seen_titles = {title_key(s) for s in selected}
remaining = [s for s in candidates if s not in selected]
remaining.sort(key=lambda s: len((s.body or "").strip()), reverse=True)
for sec in remaining:
key = title_key(sec)
if key in seen_titles:
continue
selected.append(sec)
seen_titles.add(key)
if len(selected) >= 3:
break
selected = selected[:5]
out: list[str] = [
"## Action Pack (Operational)" if not locale.lower().startswith("fr") else "## Action Pack (Opérationnel)",
"",
"This appendix turns the mirror into Monday-morning work: owners, gates, stop conditions, and evidence artifacts."
if not locale.lower().startswith("fr")
else "Cet appendice transforme le miroir en travail exécutable : responsables, portes, critères de blocage, et artefacts de preuve.",
"Keep it generic and auditable; adapt to your tooling without inventing fake implementation details."
if not locale.lower().startswith("fr")
else "Restez générique et auditable ; adaptez à vos outils sans inventer de fausses implémentations.",
"Minimum telemetry schema (when you claim “verifiable signals”): event_type, emitter, freshness_window, owner."
if not locale.lower().startswith("fr")
else "Schéma minimum de télémétrie (quand vous promettez des “signaux vérifiables”) : event_type, emitter, freshness_window, owner.",
"",
"### Control Cards" if not locale.lower().startswith("fr") else "### Cartes de contrôle",
]
for sec in selected:
display_title = _compact_title(sec.title, max_chars=72)
gate = _action_pack_gate(sec)
out.extend(
[
"",
f"#### {display_title}",
"",
(
f'- **Control objective:** Turn "{display_title}" into an enforceable workflow (not a narrative).'
if not locale.lower().startswith("fr")
else f'- **Objectif de contrôle :** Rendre "{display_title}" opposable (pas seulement narratif).'
),
f"- **Gate:** {gate}" if not locale.lower().startswith("fr") else f"- **Porte :** {gate}",
f"- **Owner (RACI):** {_action_pack_owner(gate)}"
if not locale.lower().startswith("fr")
else f"- **Responsable (RACI) :** {_action_pack_owner(gate)}",
f"- **Stop condition:** {_action_pack_stop_condition(gate)}"
if not locale.lower().startswith("fr")
else f"- **Critère de blocage :** {_action_pack_stop_condition(gate)}",
f"- **Evidence artifact:** {_action_pack_evidence(gate)}"
if not locale.lower().startswith("fr")
else f"- **Artefact de preuve :** {_action_pack_evidence(gate)}",
]
)
out.extend(["", "### Backlog Export (Jira-ready)" if not locale.lower().startswith("fr") else "### Backlog (Jira-ready)", ""])
for idx, sec in enumerate(selected, 1):
gate = _action_pack_gate(sec)
display_title = _compact_title(sec.title, max_chars=72)
out.extend(
[
f"{idx}. [{gate}] {display_title}: define owner, gate, and stop condition"
if not locale.lower().startswith("fr")
else f"{idx}. [{gate}] {display_title} : définir propriétaire, porte, et critère de blocage",
(
" - Acceptance: owner assigned; stop condition documented and approved."
if not locale.lower().startswith("fr")
else " - Acceptance : responsable assigné ; critère de blocage documenté et approuvé."
),
(
" - Acceptance: evidence artifact defined and stored (machine-generated where possible)."
if not locale.lower().startswith("fr")
else " - Acceptance : artefact de preuve défini et stocké (machine-généré si possible)."
),
(
" - Acceptance: exceptions require owner + expiry; expiry is enforced automatically."
if not locale.lower().startswith("fr")
else " - Acceptance : exceptions = responsable + expiration ; expiration appliquée automatiquement."
),
]
)
out.extend(
[
"",
"### Policy-as-Code Appendix (pseudo-YAML)" if not locale.lower().startswith("fr") else "### Annexe policy-as-code (pseudo-YAML)",
"",
"```yaml",
"gates:",
" pr:",
" - name: \"risk scanning\"",
" stop_condition: \"block on high severity (or unknown)\"",
" evidence: \"scan_event_id + policy_version\"",
" access:",
" - name: \"assistant enablement\"",
" prerequisite: \"device baseline + local scan signal\"",
" stop_condition: \"deny when signals missing\"",
" evidence: \"access_grant_event + prerequisite_check\"",
" runtime:",
" - name: \"tool-use\"",
" prerequisite: \"allowlist + validation\"",
" stop_condition: \"block disallowed actions\"",
" evidence: \"execution_log_id + allowlist_version\"",
"exceptions:",
" expiry_days: 14",
" require_owner: true",
" require_reason: true",
"evidence:",
" freshness_days: 30",
" require_hash: true",
"```",
]
)
# Standards sources: translation table lives in the main body; Action Pack remains minimal and opposable.
_ = normalized_text
return "\n".join(out).strip()
def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str) -> str:
today = _dt.date.today().isoformat()
normalized = _normalize_ocr(source_text)
@ -2932,7 +3223,8 @@ def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pac
f"## {cover_h1}",
]
if cover_h2:
out.extend([f"### {cover_h2}", ""])
cover_h2_out = _compact_title(cover_h2, max_chars=90) if style_version == "v2.0" else cover_h2
out.extend([f"### {cover_h2_out}", ""])
else:
out.append("")
@ -3068,8 +3360,16 @@ def _generate_dave_v1_3_mirror(*, source_text: str, source_path: str, action_pac
f"## {cover_h1}",
]
)
cover_h2_out = ""
if cover_h2:
out.extend([f"### {cover_h2}", ""])
if style_version == "v2.0":
# Avoid rendering body fragments / author blocks as a subtitle.
if not _looks_like_cover_subtitle_noise(cover_h2):
cover_h2_out = _compact_title(cover_h2, max_chars=90)
else:
cover_h2_out = cover_h2
if cover_h2_out:
out.extend([f"### {cover_h2_out}", ""])
else:
out.append("")
@ -3189,8 +3489,15 @@ def _generate_dave_v1_6_mirror(*, source_text: str, source_path: str, action_pac
f"## {cover_h1}",
]
)
cover_h2_out = ""
if cover_h2:
out.extend([f"### {cover_h2}", ""])
if style_version == "v2.0":
if not _looks_like_cover_subtitle_noise(cover_h2):
cover_h2_out = _compact_title(cover_h2, max_chars=90)
else:
cover_h2_out = cover_h2
if cover_h2_out:
out.extend([f"### {cover_h2_out}", ""])
else:
out.append("")
@ -3319,8 +3626,15 @@ def _generate_dave_v1_7_mirror(*, source_text: str, source_path: str, action_pac
f"## {cover_h1}",
]
)
cover_h2_out = ""
if cover_h2:
out.extend([f"### {cover_h2}", ""])
if style_version == "v2.0":
if not _looks_like_cover_subtitle_noise(cover_h2):
cover_h2_out = _compact_title(cover_h2, max_chars=90)
else:
cover_h2_out = cover_h2
if cover_h2_out:
out.extend([f"### {cover_h2_out}", ""])
else:
out.append("")
@ -3428,9 +3742,13 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s
return False
# Avoid internal extraction artifacts and navigation noise.
lower = s.lower()
# Avoid low-signal page headers like "… | Datasheet 1".
if "datasheet" in lower:
return False
if "trace id" in lower:
return False
if lower.startswith("http://") or lower.startswith("https://"):
# Claims register is for measurable statements, not links (URLs cause broken rendering/quoting).
if "http://" in lower or "https://" in lower:
return False
if lower in {"markdown content:", "url source:"}:
return False
@ -3442,7 +3760,18 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s
for ln in lines:
if not keep(ln):
continue
if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln:
if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln and "" not in ln and "£" not in ln:
continue
# Numeric guardrail: ignore lines where the only digits are untyped singletons (e.g., "Datasheet 1").
lower = ln.lower()
has_multi_digit = bool(re.search(r"\b\d{2,}\b", ln))
has_typed_singleton = bool(
re.search(
r"\b\d+\s*(?:ms|s|sec|secs|seconds|min|mins|minutes|hour|hours|day|days|x|gb|tb|mb|kb)\b",
lower,
)
)
if not ("%" in ln or "$" in ln or "" in ln or "£" in ln or has_multi_digit or has_typed_singleton):
continue
# Skip obviously broken glyph runs.
if sum(1 for ch in ln if " " <= ch <= "~") < max(8, int(len(ln) * 0.5)):
@ -3508,6 +3837,404 @@ def _render_translation_table(*, normalized_text: str, locale: str) -> str:
return "\n".join(out).strip()
def _normalize_mermaid_block(text: str) -> str:
return "\n".join([ln.rstrip() for ln in str(text).strip().splitlines() if ln.strip()])
def _diagram_label(diagram: str, *, locale: str) -> str:
d = diagram.lower()
if "pendingreview" in d or "exception" in d:
return "Stase dexception" if locale.lower().startswith("fr") else "Exception stasis"
if "screenshot" in d or "artifact" in d or "evidence" in d or "attestation" in d:
return "Boucle de dérive de preuve" if locale.lower().startswith("fr") else "Evidence drift loop"
if "policy decision point" in d or "pdp" in d or "policy enforcement point" in d or "pep" in d:
return "Chaîne PDP/PEP" if locale.lower().startswith("fr") else "PDP/PEP chain"
if "rag store" in d or ("llm" in d and "tools" in d):
return "Architecture LLM + outils" if locale.lower().startswith("fr") else "LLM toolchain architecture"
if "questionnaire" in d or "vendor" in d or "third-party" in d:
return "Boucle tiers / fournisseurs" if locale.lower().startswith("fr") else "Thirdparty loop"
return "Boucle de contrôle (inférée)" if locale.lower().startswith("fr") else "Inferred control loop"
def _apply_dave_v2_0_postprocess(md: str, *, locale: str) -> str:
"""
IF.DAVE v2.0 hardening pass:
- Limit Dave Factor callouts to 12 per dossier (keep highest-signal sections).
- Deduplicate Mermaid diagrams: render each unique diagram once in an Annex and reference it in-body.
"""
lines = md.splitlines()
# Identify the footer separator (last '---') so we can insert an Annex above it.
footer_sep_idx = None
in_fence = False
for idx, ln in enumerate(lines):
stripped = ln.strip()
if stripped.startswith("```"):
in_fence = not in_fence
continue
if in_fence:
continue
if stripped == "---":
footer_sep_idx = idx
if footer_sep_idx is None:
footer_sep_idx = len(lines)
# 1) Callout budget: find callout blocks and keep up to 2.
callouts: list[dict] = []
current_section = ""
i = 0
while i < len(lines):
ln = lines[i]
stripped = ln.strip()
if stripped.startswith("## "):
current_section = stripped[3:].strip()
is_callout = stripped.startswith("> **The Dave Factor:**") or stripped.startswith("> **Le facteur Dave :**")
if is_callout:
start = i
j = i
while j < len(lines) and lines[j].strip().startswith(">"):
j += 1
block_text = "\n".join(lines[start:j]).strip()
callouts.append({"start": start, "end": j, "section": current_section, "text": block_text})
i = j
continue
i += 1
def score_callout(section_title: str, text: str) -> int:
s = (section_title or "").upper()
t = (text or "").lower()
score = 0
if any(k in s for k in ["REQUEST EVIDENCE", "ACCESS REQUEST", "LOCAL SECURITY", "REQUEST EVIDENCE"]):
score += 120
if "screenshot" in t or "attestation" in t:
score += 110
if "audit" in s or "compliance" in s:
score += 95
if "training" in s or "quiz" in t or "certificate" in t:
score += 75
if any(k in t for k in ["fips", "piv", "fido", "aal"]):
score += 70
if "roadmap" in t or "alignment session" in t:
score += 25
return score
keep_callouts: set[int] = set()
if callouts:
ranked = sorted(
enumerate(callouts),
key=lambda it: (-score_callout(it[1]["section"], it[1]["text"]), it[1]["start"]),
)
seen_hashes: set[str] = set()
for idx, c in ranked:
sig_lines: list[str] = []
for ln in str(c["text"]).splitlines():
s = ln.strip()
if s.startswith("> **The Dave Factor:**") or s.startswith("> **Le facteur Dave :**"):
sig_lines.append(s)
if s.startswith("> **Countermeasure:**") or s.startswith("> **Contre-mesure :**"):
sig_lines.append(s)
if len(sig_lines) >= 2:
break
signature = "\n".join(sig_lines).strip() or str(c["text"]).strip()
h = _sha256_text(signature)
if h in seen_hashes:
continue
keep_callouts.add(idx)
seen_hashes.add(h)
if len(keep_callouts) >= 2:
break
# 2) Mermaid dedupe: collect all mermaid code fences, remove in-body blocks and replace with references.
diagrams: list[dict] = []
current_section = ""
in_other_fence = False
i = 0
while i < len(lines):
stripped = lines[i].strip()
if stripped.startswith("```") and stripped != "```mermaid":
in_other_fence = not in_other_fence
i += 1
continue
if in_other_fence:
i += 1
continue
if stripped.startswith("## "):
current_section = stripped[3:].strip()
i += 1
continue
if stripped == "```mermaid":
start = i
j = i + 1
while j < len(lines) and lines[j].strip() != "```":
j += 1
end = min(len(lines), j + 1)
raw = "\n".join(lines[start + 1 : j])
norm = _normalize_mermaid_block(raw)
if norm:
diagrams.append(
{
"start": start,
"end": end,
"section": current_section,
"raw": raw.rstrip(),
"norm": norm,
}
)
i = end
continue
i += 1
unique_diagrams: list[dict] = []
seen: set[str] = set()
for dgm in diagrams:
if dgm["norm"] in seen:
continue
seen.add(dgm["norm"])
unique_diagrams.append(dgm)
# Guarantee at least two diagrams by adding safe inferred defaults if needed.
def ensure_default_diagram(content: str) -> None:
norm = _normalize_mermaid_block(content)
if not norm or norm in seen:
return
seen.add(norm)
unique_diagrams.append({"raw": content.rstrip(), "norm": norm, "section": ""})
if len(unique_diagrams) < 2:
ensure_default_diagram(
"\n".join(
[
"flowchart TD",
' A["Control intent"] --> B["Manual evidence requested"]',
' B --> C["Artifact produced"]',
' C --> D["Dashboard goes green"]',
' D --> E["Exceptions accumulate"]',
' E --> F["Definition of compliance shifts"]',
" F --> B",
]
)
)
if len(unique_diagrams) < 2:
ensure_default_diagram(
"\n".join(
[
"stateDiagram-v2",
" [*] --> Requested",
' Requested --> PendingReview: \"needs alignment\"',
" PendingReview --> PendingReview: renewal",
" PendingReview --> Approved: silence",
' Approved --> Approved: \"temporary\" extension',
]
)
)
# Build stable diagram names.
label_counts: dict[str, int] = {}
diagram_names: dict[str, str] = {}
for dgm in unique_diagrams:
label = _diagram_label(dgm.get("raw", ""), locale=locale)
label_counts[label] = label_counts.get(label, 0) + 1
used_seq: dict[str, int] = {}
for dgm in unique_diagrams:
label = _diagram_label(dgm.get("raw", ""), locale=locale)
used_seq[label] = used_seq.get(label, 0) + 1
suffix = f" ({used_seq[label]})" if label_counts.get(label, 0) > 1 else ""
diagram_names[dgm["norm"]] = f"{label}{suffix}"
# Rebuild document: remove callouts beyond budget, remove mermaid blocks, and insert Annex before footer.
remove_ranges: list[tuple[int, int]] = []
for idx, c in enumerate(callouts):
if idx not in keep_callouts:
remove_ranges.append((c["start"], c["end"]))
for dgm in diagrams:
# Remove the optional diagram heading directly above inferred diagrams (best effort).
start = dgm["start"]
maybe_heading = start - 2
if maybe_heading >= 0:
h0 = lines[maybe_heading].strip()
h1 = lines[maybe_heading + 1].strip() if maybe_heading + 1 < len(lines) else ""
if h0.startswith("###") and "diagram" in h0.lower() and h1 == "":
start = maybe_heading
remove_ranges.append((start, dgm["end"]))
# Merge / normalize ranges.
remove_ranges.sort()
merged: list[tuple[int, int]] = []
for start, end in remove_ranges:
if not merged or start > merged[-1][1]:
merged.append((start, end))
else:
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
out_lines: list[str] = []
i = 0
range_idx = 0
referenced: set[str] = set()
while i < len(lines):
if range_idx < len(merged) and i == merged[range_idx][0]:
end = merged[range_idx][1]
# If this range was a diagram, replace with a reference line (based on the diagram norm if we can find it).
# Best effort: find the mermaid start inside this range.
ref = None
for dgm in diagrams:
if dgm["start"] >= merged[range_idx][0] and dgm["end"] <= merged[range_idx][1]:
name = diagram_names.get(dgm["norm"])
if name:
if dgm["norm"] not in referenced:
ref = f"See Annex: {name}."
referenced.add(dgm["norm"])
break
if ref:
out_lines.append(ref)
out_lines.append("")
i = end
range_idx += 1
continue
out_lines.append(lines[i])
i += 1
# Remove empty/legacy inferred-diagram annex headings (we insert our own).
legacy_annex_titles = {
"## Annex (inferred diagrams)",
"## Annexes (diagrammes inférés)",
}
cleaned: list[str] = []
in_legacy_annex = False
for ln in out_lines:
stripped = ln.strip()
if stripped in legacy_annex_titles:
in_legacy_annex = True
continue
if in_legacy_annex and stripped == "---":
in_legacy_annex = False
cleaned.append(ln)
continue
if in_legacy_annex and stripped.startswith("## "):
in_legacy_annex = False
if in_legacy_annex:
continue
cleaned.append(ln)
# Insert annex right before footer separator.
footer_sep_idx = None
in_fence = False
for idx, ln in enumerate(cleaned):
stripped = ln.strip()
if stripped.startswith("```"):
in_fence = not in_fence
continue
if in_fence:
continue
if stripped == "---":
footer_sep_idx = idx
if footer_sep_idx is None:
footer_sep_idx = len(cleaned)
if locale.lower().startswith("fr"):
annex_title = "## Annexes (actifs partagés)"
annex_note = "_Diagrammes dédupliqués : chaque schéma unique est rendu une fois ici ; les sections y renvoient._"
diag_title = "### Diagrammes (dédupliqués)"
else:
annex_title = "## Annex (shared assets)"
annex_note = "_Deduped diagrams: each unique diagram is rendered once here; sections reference it by name._"
diag_title = "### Diagrams (deduped)"
annex_lines = [annex_title, "", annex_note, "", diag_title, ""]
for dgm in unique_diagrams:
name = diagram_names.get(dgm["norm"]) or _diagram_label(dgm.get("raw", ""), locale=locale)
annex_lines.extend([f"#### {name}", "", "```mermaid", dgm.get("raw", "").rstrip(), "```", ""])
final_lines = cleaned[:footer_sep_idx] + [""] + annex_lines + cleaned[footer_sep_idx:]
return "\n".join([ln.rstrip() for ln in final_lines]).strip() + "\n"
def _merge_consecutive_sections_by_title(sections: list[_SourceSection]) -> list[_SourceSection]:
"""
Extraction sometimes yields many page-level "sections" with the same repeated header title.
For v2.0+ we merge consecutive equal-titled sections to prevent template repetition.
"""
if len(sections) < 3:
return sections
def norm(title: str) -> str:
s = " ".join((title or "").split()).strip()
s = re.sub(r"https?://\\S+", "", s).strip()
s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip()
s = " ".join(s.split()).strip()
return s.upper()
merged: list[_SourceSection] = [sections[0]]
for sec in sections[1:]:
if merged and norm(sec.title) == norm(merged[-1].title):
prev = merged[-1]
body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip()
why_prev = (prev.why_it_matters or "").strip()
why_new = (sec.why_it_matters or "").strip()
why = why_prev or why_new or None
if why_prev and why_new and why_new not in why_prev:
why = "\n".join([why_prev, why_new]).strip()
merged[-1] = _SourceSection(title=prev.title, body=body, why_it_matters=why)
else:
merged.append(sec)
return merged
def _merge_repeated_titles_globally(sections: list[_SourceSection], *, min_repeats: int = 3) -> list[_SourceSection]:
"""
If a title repeats many times across extracted sections, it's usually a page header.
Merge all instances into the first occurrence to avoid template repetition.
"""
if len(sections) < 3:
return sections
def norm(title: str) -> str:
s = " ".join((title or "").split()).strip()
s = re.sub(r"https?://\\S+", "", s).strip()
s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip()
s = " ".join(s.split()).strip()
return s.upper()
counts: dict[str, int] = {}
for sec in sections[1:]:
key = norm(sec.title)
if not key:
continue
counts[key] = counts.get(key, 0) + 1
repeated = {k for k, n in counts.items() if n >= min_repeats}
if not repeated:
return sections
out: list[_SourceSection] = [sections[0]]
merged_by_title: dict[str, _SourceSection] = {}
out_idx_by_title: dict[str, int] = {}
for sec in sections[1:]:
key = norm(sec.title)
if key in repeated:
if key not in out_idx_by_title:
out_idx_by_title[key] = len(out)
merged_by_title[key] = sec
out.append(sec)
else:
out_idx = out_idx_by_title[key]
prev = merged_by_title[key]
body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip()
why_prev = (prev.why_it_matters or "").strip()
why_new = (sec.why_it_matters or "").strip()
why = why_prev or why_new or None
if why_prev and why_new and why_new not in why_prev:
why = "\n".join([why_prev, why_new]).strip()
merged = _SourceSection(title=prev.title, body=body, why_it_matters=why)
merged_by_title[key] = merged
out[out_idx] = merged
else:
out.append(sec)
return out
def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str, style_version: str = "v1.8") -> str:
today = _dt.date.today().isoformat()
normalized = _normalize_ocr(source_text)
@ -3519,6 +4246,9 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
action_pack_enabled = (not _truthy_env("REVOICE_NO_ACTION_PACK")) or bool(action_pack) or _truthy_env("REVOICE_ACTION_PACK")
sections = _extract_sections(normalized)
if style_version == "v2.0":
sections = _merge_consecutive_sections_by_title(sections)
sections = _merge_repeated_titles_globally(sections)
if not sections:
raise ValueError("No content extracted from source")
if len(sections) == 1:
@ -3616,8 +4346,15 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
f"## {cover_h1}",
]
)
cover_h2_out = ""
if cover_h2:
out.extend([f"### {cover_h2}", ""])
if style_version == "v2.0":
if not _looks_like_cover_subtitle_noise(cover_h2):
cover_h2_out = _compact_title(cover_h2, max_chars=90)
else:
cover_h2_out = cover_h2
if cover_h2_out:
out.extend([f"### {cover_h2_out}", ""])
else:
out.append("")
@ -3663,6 +4400,9 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
out.extend([table, ""])
if action_pack_enabled:
if style_version == "v2.0":
out.append(_render_action_pack_v2_0(sections=sections[1:], normalized_text=normalized, locale=locale))
else:
out.append(_render_action_pack(sections[1:]))
out.append("")
@ -3735,4 +4475,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
]
)
return "\n".join(out).strip() + "\n"
doc = "\n".join(out).strip() + "\n"
if style_version == "v2.0":
return _apply_dave_v2_0_postprocess(doc, locale=locale)
return doc

View file

@ -130,15 +130,33 @@ def _revoice_preflight(*, style: str, md_path: Path, source_path: Path) -> str:
def _extract_first_claim(md: str) -> str:
claims: list[str] = []
for line in md.splitlines():
m = re.match(r"^- The source claims: [“\"](?P<q>.+?)[”\"]\s*$", line.strip())
if m:
claim = m.group("q").strip()
if len(claim) > 160:
return claim[:157].rstrip() + ""
return claim
claims.append(m.group("q").strip())
if not claims:
return ""
def is_low_signal(claim: str) -> bool:
c = (claim or "").strip()
lower = c.lower()
if "datasheet" in lower:
return True
if "all rights reserved" in lower or "copyright" in lower:
return True
# Very short fragments tend to be headers/footers or OCR junk.
if len(c) < 40:
return True
return False
# Prefer the first non-noise claim; fall back to the first claim if all are noisy.
chosen = next((c for c in claims if not is_low_signal(c)), claims[0])
if len(chosen) > 160:
return chosen[:157].rstrip() + ""
return chosen
def _extract_first_dave_factor(md: str) -> str:
for line in md.splitlines():
@ -177,7 +195,8 @@ def _write_marketing(
day_upper = day.day.upper()
next_label = f"{next_day.day.upper()}{next_day.edition} {next_day.hashtag}" if next_day else "Next week: new drops."
dave_factor = _extract_first_dave_factor(dossier_md) or "The control drifts into a status update, and the status update becomes the control."
claim = _extract_first_claim(dossier_md) or "(no short claim extracted)"
claim = _extract_first_claim(dossier_md)
quote_bullet = f"- The source claims: “{claim}" if claim else "- (Add one short measurable source quote from the dossiers Claims Register.)"
lines = [
f"# Thread Pack — {day_upper} ({day.edition} Edition)",
@ -207,7 +226,7 @@ def _write_marketing(
"",
"## Post 3 — The Source Claims (quote-budget)",
"",
f"- The source claims: “{claim}",
quote_bullet,
"",
"## Post 4 — Sponsor Bumper (mid-thread)",
"",
@ -602,14 +621,14 @@ def main() -> int:
)
# Week index + full pack.
m = re.search(r"(v\\d+(?:\\.\\d+)*)", args.style)
m = re.search(r"(v\d+(?:\.\d+)*)", args.style)
week_title = m.group(1) if m else args.style
index_path = build_dir / "index.md"
_write_week_index(out_path=index_path, week_title=week_title, base_url=args.base_url, days=ordered, source_links=source_links)
week_pack_path = build_dir / "week.pack.md"
body_parts = [
"# InfraFabric External Review Pack — Full Week (v1.9)",
f"# InfraFabric External Review Pack — Full Week ({week_title})",
"",
"This file embeds all daily packs for sandboxed review environments. Review one day at a time.",
"",