IF.TTT: improve reviewability + thread pack extraction
This commit is contained in:
parent
bac86571a8
commit
b644130412
4 changed files with 1206 additions and 63 deletions
41
AGENTS.md
41
AGENTS.md
|
|
@ -22,11 +22,52 @@ There is a sync job that mirrors `https://git.infrafabric.io/danny/hosted.git` i
|
|||
**Important:** The sync uses `rsync --delete`, so anything not in the mirrored repo would normally be removed. To keep operator-generated review artifacts stable, the sync script now excludes:
|
||||
- `bibles/`
|
||||
- `review/`
|
||||
- `iftrace.py` (operator-maintained; don’t overwrite via repo sync)
|
||||
|
||||
So **publish operator-generated bibles/review packs under**:
|
||||
- `/srv/hosted-static/public/bibles/…`
|
||||
- `/srv/hosted-static/public/review/…`
|
||||
|
||||
## HTML-only sandbox fallback (new; critical for external reviewers)
|
||||
|
||||
Some LLM “web fetchers” can load HTML but fail on `.md/.py/.tar.gz`. To keep the IF.TTT “open governance” premise intact for external review:
|
||||
|
||||
- Keep the raw assets (`.md`, `.tar.gz`) **and** provide an **HTML view** on the same stable alias surface.
|
||||
- Share surface:
|
||||
- Raw pack: `/static/pack/<shareId>.md`
|
||||
- HTML pack view: `/static/pack/<shareId>`
|
||||
- Raw review pack: `/static/review/<shareId>.md` (alt: `/static/review-pack/<shareId>.md`)
|
||||
- HTML review pack view: `/static/review/<shareId>` (alt: `/static/review-pack/<shareId>`)
|
||||
- Raw marketing: `/static/marketing/<shareId>.md`
|
||||
- HTML marketing view: `/static/marketing/<shareId>`
|
||||
- Hosted review artifacts (`/static/hosted/review/**`) also have `.html` wrappers generated post-sync.
|
||||
|
||||
Implementation notes:
|
||||
- Caddy rewrites `/static/*` (HTML view endpoints) to the red-team app (`pct 212`).
|
||||
- Hosted `.html` wrappers are generated by `pct 210:/usr/local/bin/hosted_static_build_html_wrappers.py` after each sync.
|
||||
|
||||
## Full stack + links (operator reference)
|
||||
|
||||
- `/root/docs/19-ifttt-full-stack-and-working-links.md` is the “single page” reference for:
|
||||
- Which apps run where (pct IDs + IPs)
|
||||
- Which URLs are canonical for sharing
|
||||
- Copy/paste-safe example links
|
||||
- IF.TTT public overview page (hosted-static): https://infrafabric.io/static/hosted/ifttt/
|
||||
|
||||
## IF.TTT paper update review pack (known-good example)
|
||||
|
||||
Use this pack when requesting external critique of the IF.TTT paper update (receipt-first chronology + public receipts + triage bundles):
|
||||
|
||||
- Landing: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/`
|
||||
- Pack (MD): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.md`
|
||||
- Pack (HTML): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.html`
|
||||
- Pack (tar.gz): `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz`
|
||||
- Pack hash: `https://infrafabric.io/static/hosted/review/ifttt-paper-update/2025-12-28/review-pack.tar.gz.sha256`
|
||||
- Triage selector demo (canonical): `https://infrafabric.io/static/hosted/review/trace-bundles/d70ed99a/index.md`
|
||||
- Offline verifier: `https://infrafabric.io/static/hosted/iftrace.py`
|
||||
|
||||
Note: some LLM “web fetchers” reject `.tar.gz` with a client-side `415` even when browsers/curl succeed; use the `.html` pack in those environments.
|
||||
|
||||
## Week review packs (v1.8)
|
||||
|
||||
Week v1.8 packs are published here:
|
||||
|
|
|
|||
|
|
@ -8,6 +8,24 @@ import MarkdownIt from "markdown-it";
|
|||
import express from "express";
|
||||
import multer from "multer";
|
||||
|
||||
/*
|
||||
Public, no-login receipt surface (IF.TTT)
|
||||
----------------------------------------
|
||||
This server exposes Shadow Dossiers and their "receipt" artifacts via two parallel
|
||||
representations:
|
||||
|
||||
- Raw (download-friendly): `*.md` (and tarballs elsewhere)
|
||||
- HTML views: same path without the `.md` suffix
|
||||
|
||||
Rationale: some external review environments (including certain LLM "web fetchers")
|
||||
reliably load `text/html` but may reject "downloadable" assets like `.md/.py/.tar.gz`.
|
||||
Keeping both surfaces makes the governance/receipt premise reviewable by humans *and*
|
||||
restricted sandboxes.
|
||||
|
||||
Deployment detail: the stable public aliases live under `/static/*` on the public
|
||||
domain and are reverse-proxied here (see operator docs: `/root/docs/17-ifttt-public-receipt-surface.md`).
|
||||
*/
|
||||
|
||||
const __filename = url.fileURLToPath(import.meta.url);
|
||||
const __dirname = path.dirname(__filename);
|
||||
|
||||
|
|
@ -145,6 +163,7 @@ function renderTraceHeaderHtml({ verification, job }) {
|
|||
const createdAt = job?.createdAt ? String(job.createdAt) : "";
|
||||
const traceId = String(job?.id || "");
|
||||
const style = String(job?.style || "");
|
||||
const ttt = job?._ttt_trace_receipt || null;
|
||||
|
||||
const checks = verification?.checks || {};
|
||||
const outputOk = checks.outputOk === true;
|
||||
|
|
@ -154,6 +173,11 @@ function renderTraceHeaderHtml({ verification, job }) {
|
|||
const outputLabel = outputOk ? "PASS" : "FAIL";
|
||||
const sourceLabel = sourceOk === true ? "PASS" : sourceOk === false ? "FAIL" : "UNKNOWN";
|
||||
|
||||
const quantumReady = Boolean(ttt && ttt.quantum_ready === true);
|
||||
const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : "";
|
||||
const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : "";
|
||||
const pqLabel = quantumReady ? `READY${pqAlgo ? ` (${pqAlgo})` : ""}` : ttt ? "ABSENT" : "UNKNOWN";
|
||||
|
||||
const safeCreatedAt = createdAt ? escapeHtml(createdAt) : "";
|
||||
const safeTraceId = traceId ? escapeHtml(traceId) : "";
|
||||
const safeStyle = style ? escapeHtml(style) : "";
|
||||
|
|
@ -171,6 +195,7 @@ function renderTraceHeaderHtml({ verification, job }) {
|
|||
` <ul class="trace-checks">`,
|
||||
` <li>Output hash check: <strong>${escapeHtml(outputLabel)}</strong></li>`,
|
||||
` <li>Source hash check: <strong>${escapeHtml(sourceLabel)}</strong></li>`,
|
||||
` <li>Quantum-ready receipt: <strong>${escapeHtml(pqLabel)}</strong>${pqStatus ? ` <small>(${escapeHtml(pqStatus)})</small>` : ""}</li>`,
|
||||
` <li>Quality warnings: <strong>${warningsPresent ? "present" : "none recorded"}</strong></li>`,
|
||||
` </ul>`,
|
||||
`</div>`,
|
||||
|
|
@ -258,11 +283,30 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
const createdAt = job?.createdAt ? String(job.createdAt) : "";
|
||||
const status = job?.status ? String(job.status) : "";
|
||||
const warningsPresent = Boolean(job?.warnings && String(job.warnings).trim());
|
||||
const ttt = job?._ttt_trace_receipt || job?.tttTraceReceipt || null;
|
||||
const tttId = ttt && ttt.id ? String(ttt.id) : "";
|
||||
const tttHash = ttt && ttt.content_hash ? String(ttt.content_hash) : "";
|
||||
const pqReady = Boolean(ttt && ttt.quantum_ready === true);
|
||||
const pqAlgo = ttt && ttt.pq_algo ? String(ttt.pq_algo) : "";
|
||||
const pqStatus = ttt && ttt.pq_status ? String(ttt.pq_status) : "";
|
||||
|
||||
const traceId = String(job?.id || "").trim();
|
||||
const tracePrefixRaw = traceId ? traceId.split("-")[0] : "";
|
||||
const tracePrefix = /^[0-9a-f]{8}$/i.test(tracePrefixRaw) ? tracePrefixRaw.toLowerCase() : "";
|
||||
const triageSelectorUrl = tracePrefix
|
||||
? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.html`
|
||||
: "";
|
||||
const triageSelectorUrlRaw = tracePrefix
|
||||
? `${primaryBase}/static/hosted/review/trace-bundles/${encodeURIComponent(tracePrefix)}/index.md`
|
||||
: "";
|
||||
|
||||
const dossierUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}`;
|
||||
const traceUrl = `${primaryBase}/static/trace/${encodeURIComponent(shareId)}`;
|
||||
const downloadUrl = `${primaryBase}/static/dossier/${encodeURIComponent(shareId)}/download`;
|
||||
const packUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}.md`;
|
||||
const packHtmlUrl = `${primaryBase}/static/pack/${encodeURIComponent(shareId)}`;
|
||||
const reviewHtmlUrl = `${primaryBase}/static/review/${encodeURIComponent(shareId)}`;
|
||||
const marketingHtmlUrl = `${primaryBase}/static/marketing/${encodeURIComponent(shareId)}`;
|
||||
const sourceUrl = job?.sourceSha256
|
||||
? `${primaryBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}`
|
||||
: "";
|
||||
|
|
@ -272,6 +316,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
const directTraceUrl = `${directBase}/r/${encodeURIComponent(shareId)}/trace`;
|
||||
const directDownloadUrl = `${directBase}/r/${encodeURIComponent(shareId)}/download`;
|
||||
const directPackUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack.md`;
|
||||
const directPackHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/pack`;
|
||||
const directReviewHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/review-pack`;
|
||||
const directMarketingHtmlUrl = `${directBase}/r/${encodeURIComponent(shareId)}/marketing`;
|
||||
|
||||
const lastResortBase = normalizeBaseUrl(publicBaseUrl);
|
||||
const lastResortDossierUrl = lastResortBase ? `${lastResortBase}/r/${encodeURIComponent(shareId)}` : "";
|
||||
|
|
@ -289,6 +336,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
"- You can independently verify the downloaded dossier Markdown by hashing it and comparing to `Output sha256` below.",
|
||||
"- You can independently verify the hosted source file (if present) by hashing it and comparing to `Source sha256` below.",
|
||||
"- This page binds those two fingerprints together as a single public evidence record.",
|
||||
pqReady
|
||||
? `- This trace also has a **Quantum-ready** signed receipt record (${pqAlgo || "PQ"}; ${pqStatus || "hybrid"}).`
|
||||
: "- This trace does not claim any post-quantum proof unless the header says QUANTUM READY.",
|
||||
"",
|
||||
"## What this trace does not prove",
|
||||
"",
|
||||
|
|
@ -301,6 +351,11 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
`- Dossier (rendered): ${dossierUrl}`,
|
||||
`- Dossier (download Markdown): ${downloadUrl}`,
|
||||
`- Single-file pack (review + dossier + trace): ${packUrl}`,
|
||||
`- Pack (HTML view; for restrictive sandboxes): ${packHtmlUrl}`,
|
||||
`- Review pack (HTML view; links-only): ${reviewHtmlUrl}`,
|
||||
`- Marketing excerpt (HTML view): ${marketingHtmlUrl}`,
|
||||
triageSelectorUrl ? `- Offline bundles (triage selector): ${triageSelectorUrl}` : null,
|
||||
triageSelectorUrlRaw ? `- Offline bundles (raw Markdown): ${triageSelectorUrlRaw}` : null,
|
||||
sourceUrl ? `- Source (PDF): ${sourceUrl}` : null,
|
||||
`- This trace page: ${traceUrl}`,
|
||||
mirrorBase ? "" : null,
|
||||
|
|
@ -308,6 +363,7 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
mirrorBase ? "" : null,
|
||||
mirrorBase ? `- Dossier: ${mirrorBase}/static/dossier/${encodeURIComponent(shareId)}` : null,
|
||||
mirrorBase ? `- Pack: ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}.md` : null,
|
||||
mirrorBase ? `- Pack (HTML view): ${mirrorBase}/static/pack/${encodeURIComponent(shareId)}` : null,
|
||||
mirrorBase ? `- Trace: ${mirrorBase}/static/trace/${encodeURIComponent(shareId)}` : null,
|
||||
mirrorBase && sourceUrl ? `- Source: ${mirrorBase}/static/source/${job.sourceSha256}${path.extname(job.sourcePath || "").toLowerCase()}` : null,
|
||||
"",
|
||||
|
|
@ -316,6 +372,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
`- Dossier: ${directDossierUrl}`,
|
||||
`- Download: ${directDownloadUrl}`,
|
||||
`- Pack: ${directPackUrl}`,
|
||||
`- Pack (HTML view): ${directPackHtmlUrl}`,
|
||||
`- Review pack (HTML view): ${directReviewHtmlUrl}`,
|
||||
`- Marketing excerpt (HTML view): ${directMarketingHtmlUrl}`,
|
||||
`- Trace: ${directTraceUrl}`,
|
||||
lastResortBase && lastResortBase !== directBase ? "" : null,
|
||||
lastResortBase && lastResortBase !== directBase ? "## Last resort (alternate host)" : null,
|
||||
|
|
@ -338,6 +397,9 @@ function renderTraceMarkdown({ shareId, job, publicBaseUrl, staticPublicBaseUrl
|
|||
`- Source sha256: \`${job.sourceSha256 || ""}\``,
|
||||
`- Style: \`${job.style || ""}\``,
|
||||
`- Source bytes: \`${String(job.sourceBytes ?? "")}\``,
|
||||
tttId ? `- Signed trace receipt ID: \`${tttId}\`` : null,
|
||||
tttHash ? `- Signed trace receipt hash: \`${tttHash}\`` : null,
|
||||
pqStatus ? `- PQ status: \`${pqStatus}\`` : null,
|
||||
"",
|
||||
"## How to verify (locally)",
|
||||
"",
|
||||
|
|
@ -705,6 +767,140 @@ function staticPublicBaseUrlForRequest(req, fallbackPublicBaseUrl) {
|
|||
return publicBaseFromRequest(req, fallbackPublicBaseUrl);
|
||||
}
|
||||
|
||||
function tttRegistryBaseUrl() {
|
||||
const explicit = String(process.env.TTT_REGISTRY_BASE_URL || "").trim();
|
||||
return explicit ? explicit.replace(/\/+$/g, "") : "";
|
||||
}
|
||||
|
||||
function tttRegistryApiToken() {
|
||||
return String(process.env.TTT_API_TOKEN || "").trim();
|
||||
}
|
||||
|
||||
async function fetchJson(url, { method = "GET", headers, body, timeoutMs = 4500 } = {}) {
|
||||
const controller = new AbortController();
|
||||
const t = setTimeout(() => controller.abort(), timeoutMs);
|
||||
try {
|
||||
const resp = await fetch(url, { method, headers, body, signal: controller.signal });
|
||||
const text = await resp.text();
|
||||
let data = null;
|
||||
try {
|
||||
data = JSON.parse(text);
|
||||
} catch {
|
||||
data = { raw: text };
|
||||
}
|
||||
return { ok: resp.ok, status: resp.status, data };
|
||||
} finally {
|
||||
clearTimeout(t);
|
||||
}
|
||||
}
|
||||
|
||||
function traceReceiptRecordId(traceId) {
|
||||
const id = String(traceId || "").trim();
|
||||
if (!looksLikeUuid(id)) return "";
|
||||
return `if://trace/${id}/v1`;
|
||||
}
|
||||
|
||||
function buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl }) {
|
||||
const base = normalizeBaseUrl(staticPublicBaseUrl || process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io");
|
||||
|
||||
const sid = String(shareId || "").trim();
|
||||
const traceId = String(job?.id || "").trim();
|
||||
|
||||
const sourceExt = String(job?.sourcePath ? path.extname(job.sourcePath) : "").toLowerCase() || ".pdf";
|
||||
const sourceUrl = job?.sourceSha256 ? `${base}/static/source/${job.sourceSha256}${sourceExt}` : "";
|
||||
|
||||
return {
|
||||
share_id: sid,
|
||||
trace_id: traceId,
|
||||
created_at: job?.createdAt || "",
|
||||
style: job?.style || "",
|
||||
source_sha256: job?.sourceSha256 || "",
|
||||
output_sha256: job?.outputSha256 || "",
|
||||
urls: {
|
||||
pack_md: `${base}/static/pack/${encodeURIComponent(sid)}.md`,
|
||||
pack_html: `${base}/static/pack/${encodeURIComponent(sid)}`,
|
||||
review_html: `${base}/static/review/${encodeURIComponent(sid)}`,
|
||||
marketing_html: `${base}/static/marketing/${encodeURIComponent(sid)}`,
|
||||
dossier_html: `${base}/static/dossier/${encodeURIComponent(sid)}`,
|
||||
dossier_md: `${base}/static/dossier/${encodeURIComponent(sid)}/download`,
|
||||
trace_html: `${base}/static/trace/${encodeURIComponent(sid)}`,
|
||||
source_pdf: sourceUrl,
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
function summarizeTttRecord(record) {
|
||||
const rec = record && typeof record === "object" ? record : null;
|
||||
if (!rec) return null;
|
||||
const pqStatus = String(rec.pq_status || "").trim();
|
||||
const pqAlgo = String(rec.pq_algo || "").trim();
|
||||
const signaturePqPresent = Boolean(rec.signature_pq);
|
||||
return {
|
||||
id: String(rec.id || "").trim(),
|
||||
content_hash: String(rec.content_hash || "").trim(),
|
||||
signer: String(rec.signer || "").trim(),
|
||||
pq_status: pqStatus,
|
||||
pq_algo: pqAlgo,
|
||||
pq_signature_present: signaturePqPresent,
|
||||
quantum_ready: signaturePqPresent && pqStatus !== "classical-only",
|
||||
};
|
||||
}
|
||||
|
||||
async function fetchTttRecordById(recordId) {
|
||||
const base = tttRegistryBaseUrl();
|
||||
if (!base || !recordId) return { ok: false, status: 0, record: null };
|
||||
const u = `${base}/v1/citation?id=${encodeURIComponent(recordId)}`;
|
||||
try {
|
||||
const { ok, status, data } = await fetchJson(u, { method: "GET" });
|
||||
const verified = Boolean(data && data.verified === true);
|
||||
const record = verified && data && data.record ? data.record : null;
|
||||
return { ok: ok && verified, status, record };
|
||||
} catch {
|
||||
return { ok: false, status: 0, record: null };
|
||||
}
|
||||
}
|
||||
|
||||
async function upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl }) {
|
||||
const base = tttRegistryBaseUrl();
|
||||
if (!base) return { ok: false, status: 0, record: null, mode: "disabled" };
|
||||
|
||||
const rid = traceReceiptRecordId(job?.id);
|
||||
if (!rid) return { ok: false, status: 0, record: null, mode: "invalid_trace_id" };
|
||||
|
||||
// Best effort: read-only GET first (no token required).
|
||||
const existing = await fetchTttRecordById(rid);
|
||||
if (existing.ok && existing.record) return { ok: true, status: 200, record: existing.record, mode: "found" };
|
||||
|
||||
const token = tttRegistryApiToken();
|
||||
if (!token) return { ok: false, status: 0, record: null, mode: "no_token" };
|
||||
|
||||
const evidence = buildTraceReceiptEvidence({ job, shareId, staticPublicBaseUrl });
|
||||
const claim = `IF.TTT trace receipt for shareId=${shareId} trace_id=${job.id}`;
|
||||
const payload = {
|
||||
id: rid,
|
||||
claim,
|
||||
evidence,
|
||||
timestamp: job?.createdAt || undefined,
|
||||
};
|
||||
|
||||
const url = `${base}/v1/citation`;
|
||||
try {
|
||||
const { ok, status, data } = await fetchJson(url, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
Authorization: `Bearer ${token}`,
|
||||
"Content-Type": "application/json; charset=utf-8",
|
||||
},
|
||||
body: JSON.stringify(payload),
|
||||
timeoutMs: 6500,
|
||||
});
|
||||
if (!ok || !data || !data.record) return { ok: false, status, record: null, mode: "create_failed" };
|
||||
return { ok: true, status, record: data.record, mode: "created" };
|
||||
} catch {
|
||||
return { ok: false, status: 0, record: null, mode: "create_failed" };
|
||||
}
|
||||
}
|
||||
|
||||
function looksLikeUuid(value) {
|
||||
return /^[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$/i.test(String(value || ""));
|
||||
}
|
||||
|
|
@ -1046,11 +1242,27 @@ function main() {
|
|||
const job = readJob(jobsDir, share.jobId);
|
||||
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
|
||||
|
||||
const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir });
|
||||
const jobForRender = { ...job, _verification: verification };
|
||||
|
||||
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
|
||||
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
|
||||
|
||||
// Best-effort: attach a registry-signed trace receipt record so we can render
|
||||
// black/white “QUANTUM READY” without over-claiming. If the registry is
|
||||
// unreachable, we still render the classic hash receipt.
|
||||
let tttTraceReceipt = job.tttTraceReceipt || null;
|
||||
if (!tttTraceReceipt || !tttTraceReceipt.id || !tttTraceReceipt.content_hash) {
|
||||
const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl });
|
||||
if (ttt.ok && ttt.record) {
|
||||
tttTraceReceipt = summarizeTttRecord(ttt.record);
|
||||
if (tttTraceReceipt) {
|
||||
job.tttTraceReceipt = tttTraceReceipt;
|
||||
writeJob(jobsDir, job);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const verification = await computeVerificationStatus({ job, projectRoot, outputsDir, uploadsDir });
|
||||
const jobForRender = { ...job, _verification: verification, _ttt_trace_receipt: tttTraceReceipt };
|
||||
|
||||
const md = renderTraceMarkdown({ shareId, job: jobForRender, publicBaseUrl, staticPublicBaseUrl });
|
||||
const html = markdown.render(md);
|
||||
|
||||
|
|
@ -1059,7 +1271,10 @@ function main() {
|
|||
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/download">Download Markdown</a>`,
|
||||
job.sourcePath ? `<a href="/r/${encodeURIComponent(shareId)}/source">Download source</a>` : "",
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Review pack (MD)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Single-file pack (MD)</a>`,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ");
|
||||
|
|
@ -1083,6 +1298,9 @@ function main() {
|
|||
res.redirect(302, staticFile.urlPath);
|
||||
});
|
||||
|
||||
// NOTE: These routes intentionally come in pairs:
|
||||
// - `*.md` is the raw, download-friendly artifact
|
||||
// - same path without `.md` is the HTML view (for HTML-only sandboxes)
|
||||
app.get("/r/:shareId/review-pack.md", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
|
|
@ -1106,6 +1324,33 @@ function main() {
|
|||
.send(md);
|
||||
});
|
||||
|
||||
app.get("/r/:shareId/review-pack", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
const share = readShare(sharesDir, shareId);
|
||||
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
|
||||
const job = readJob(jobsDir, share.jobId);
|
||||
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
|
||||
|
||||
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
|
||||
const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html");
|
||||
const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId);
|
||||
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
|
||||
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
|
||||
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
|
||||
|
||||
const md = renderReviewPackMarkdown({ shareId, job, publicBaseUrl, externalReviewUrl, staticSourceUrl, staticPublicBaseUrl });
|
||||
const html = markdown.render(md);
|
||||
const topLinks = [
|
||||
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Raw Markdown</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ");
|
||||
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Review pack", html, topLinksHtml: topLinks }));
|
||||
});
|
||||
|
||||
app.get("/r/:shareId/pack.md", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
|
|
@ -1139,6 +1384,46 @@ function main() {
|
|||
res.status(200).type("text/markdown; charset=utf-8").send(md);
|
||||
});
|
||||
|
||||
app.get("/r/:shareId/pack", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
const share = readShare(sharesDir, shareId);
|
||||
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
|
||||
const job = readJob(jobsDir, share.jobId);
|
||||
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
|
||||
|
||||
const abs = path.resolve(projectRoot, job.outputPath);
|
||||
if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path");
|
||||
if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found");
|
||||
const dossierMarkdown = fs.readFileSync(abs, "utf8");
|
||||
|
||||
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
|
||||
const externalReviewBaseUrl = String(process.env.EXTERNAL_REVIEW_BASE_URL || "https://emo-social.infrafabric.io/external-review.html");
|
||||
const externalReviewUrl = buildExternalReviewUrl(externalReviewBaseUrl, share.reviewSheetId);
|
||||
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
|
||||
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
|
||||
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
|
||||
|
||||
const md = renderSingleFilePackMarkdown({
|
||||
shareId,
|
||||
job,
|
||||
publicBaseUrl,
|
||||
externalReviewUrl,
|
||||
staticSourceUrl,
|
||||
staticPublicBaseUrl,
|
||||
dossierMarkdown,
|
||||
});
|
||||
const html = markdown.render(md);
|
||||
const topLinks = [
|
||||
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Raw Markdown</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ");
|
||||
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Single-file pack", html, topLinksHtml: topLinks }));
|
||||
});
|
||||
|
||||
app.get("/r/:shareId/marketing.md", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
|
|
@ -1169,6 +1454,43 @@ function main() {
|
|||
res.status(200).type("text/markdown; charset=utf-8").send(md);
|
||||
});
|
||||
|
||||
app.get("/r/:shareId/marketing", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
const share = readShare(sharesDir, shareId);
|
||||
if (!share?.jobId || !looksLikeUuid(share.jobId)) return res.status(404).type("text/plain").send("Not found");
|
||||
const job = readJob(jobsDir, share.jobId);
|
||||
if (!job?.outputPath) return res.status(404).type("text/plain").send("Not found");
|
||||
|
||||
const abs = path.resolve(projectRoot, job.outputPath);
|
||||
if (!abs.startsWith(outputsDir + path.sep)) return res.status(400).type("text/plain").send("Bad path");
|
||||
if (!fs.existsSync(abs)) return res.status(404).type("text/plain").send("Not found");
|
||||
const dossierMarkdown = fs.readFileSync(abs, "utf8");
|
||||
|
||||
const staticSource = ensureStaticSourceFile({ job, uploadsDir, staticSourceDir, projectRoot });
|
||||
const publicBaseUrl = publicBaseFromRequest(req, "red-team.infrafabric.io");
|
||||
const staticPublicBaseUrl = staticPublicBaseUrlForRequest(req, publicBaseUrl);
|
||||
const staticSourceUrl = staticSource ? `${staticPublicBaseUrl}${staticSource.urlPath}` : "";
|
||||
|
||||
const md = renderMarketingPackMarkdown({
|
||||
shareId,
|
||||
job,
|
||||
publicBaseUrl,
|
||||
staticPublicBaseUrl,
|
||||
staticSourceUrl,
|
||||
dossierMarkdown,
|
||||
});
|
||||
const html = markdown.render(md);
|
||||
const topLinks = [
|
||||
`<a href="/r/${encodeURIComponent(shareId)}">Back to dossier</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/marketing.md">Raw Markdown</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(" · ");
|
||||
res.status(200).type("text/html; charset=utf-8").send(renderMarkdownPage({ title: "Marketing excerpt", html, topLinksHtml: topLinks }));
|
||||
});
|
||||
|
||||
app.get("/r/:shareId", (req, res) => {
|
||||
const shareId = String(req.params.shareId || "").trim();
|
||||
if (!shareId) return res.status(404).type("text/plain").send("Not found");
|
||||
|
|
@ -1189,7 +1511,11 @@ function main() {
|
|||
`<a href="/r/${encodeURIComponent(shareId)}/download">Download Markdown</a>`,
|
||||
job.sourcePath ? `<a href="/r/${encodeURIComponent(shareId)}/source">Download source</a>` : "",
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/trace">IF.TTT trace</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack">Review pack (HTML)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/review-pack.md">Review pack (MD)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack">Single-file pack (HTML)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/pack.md">Single-file pack (MD)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/marketing">Marketing excerpt (HTML)</a>`,
|
||||
`<a href="/r/${encodeURIComponent(shareId)}/marketing.md">Marketing excerpt (MD)</a>`,
|
||||
externalReviewUrl ? `<a href="${escapeHtml(externalReviewUrl)}" target="_blank" rel="noreferrer">Feedback intake (login)</a>` : "",
|
||||
]
|
||||
|
|
@ -1286,6 +1612,20 @@ function main() {
|
|||
job.warnings = warnings ? warnings.trim() : "";
|
||||
job.outputSha256 = await sha256File(absOutputPath);
|
||||
job.status = job.warnings ? "done_with_warnings" : "done";
|
||||
|
||||
// Best-effort: create a registry-signed trace receipt record (PQ-capable).
|
||||
// This must never block publishing; failures degrade gracefully.
|
||||
try {
|
||||
const staticPublicBaseUrl = normalizeBaseUrl(process.env.STATIC_SOURCE_PUBLIC_BASE_URL || "https://infrafabric.io");
|
||||
const ttt = await upsertTttTraceReceipt({ job, shareId, staticPublicBaseUrl });
|
||||
if (ttt.ok && ttt.record) {
|
||||
const summary = summarizeTttRecord(ttt.record);
|
||||
if (summary) job.tttTraceReceipt = summary;
|
||||
}
|
||||
} catch {
|
||||
// ignore
|
||||
}
|
||||
|
||||
writeJob(jobsDir, job);
|
||||
} catch (e) {
|
||||
job.status = "error";
|
||||
|
|
|
|||
|
|
@ -287,6 +287,42 @@ def _looks_like_navigation_heavy_source(text: str) -> bool:
|
|||
return marker_hits >= 6
|
||||
|
||||
|
||||
def _looks_like_cover_subtitle_noise(value: str) -> bool:
|
||||
"""
|
||||
Heuristic: cover subtitles should be "title-ish" (short, headline-like),
|
||||
not body sentences, author blocks, or explanatory prose.
|
||||
"""
|
||||
s = " ".join((value or "").split()).strip()
|
||||
if not s:
|
||||
return True
|
||||
|
||||
# Body fragments often start mid-sentence (lowercase).
|
||||
if re.match(r"^[a-z]", s):
|
||||
return True
|
||||
|
||||
# Cover subtitles should be short; long multi-clause prose is usually body copy.
|
||||
if len(s.split()) > 18:
|
||||
return True
|
||||
|
||||
# Long prose ending with a period is rarely a subtitle for these sources.
|
||||
if s.endswith(".") and len(s) > 60:
|
||||
return True
|
||||
|
||||
low = s.lower()
|
||||
if "from left to right" in low:
|
||||
return True
|
||||
|
||||
# Author/credential blocks (common in analyst PDFs) aren't useful as subtitles.
|
||||
if re.search(r"\b(cissp|ccsk|phd|research director|business value manager)\b", low):
|
||||
return True
|
||||
|
||||
# Many commas in a long line suggests author list / affiliations.
|
||||
if s.count(",") >= 3 and len(s) > 80:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def _extract_urls(text: str) -> list[str]:
|
||||
urls: list[str] = []
|
||||
for match in _URL_RE.finditer(text):
|
||||
|
|
@ -627,10 +663,59 @@ def _parse_title_block(lines: list[str]) -> tuple[str, int]:
|
|||
while i < len(lines) and not lines[i].strip():
|
||||
i += 1
|
||||
title_lines: list[str] = []
|
||||
# Title blocks should be short; OCR/PDF extraction sometimes concatenates body text into the "title".
|
||||
# Heuristic: keep up to a few short lines and stop before body-like lines (long sentences, URLs, etc.).
|
||||
max_title_lines = 3
|
||||
max_title_words = 14
|
||||
max_title_chars = 110
|
||||
max_total_words = 18
|
||||
max_total_chars = 120
|
||||
total_words = 0
|
||||
total_chars = 0
|
||||
while i < len(lines) and lines[i].strip():
|
||||
stripped = lines[i].strip()
|
||||
if stripped.lower() != "snyk":
|
||||
lower = stripped.lower()
|
||||
if lower == "snyk":
|
||||
i += 1
|
||||
continue
|
||||
# Skip common page-header noise (e.g., "… | Datasheet 1").
|
||||
if "|" in stripped and "datasheet" in lower:
|
||||
i += 1
|
||||
continue
|
||||
# If the very first non-empty line is already "body-like", synthesize a short title
|
||||
# from it but keep the full line in the body (do not consume it).
|
||||
word_count = len(stripped.split())
|
||||
looks_body_like = (
|
||||
len(stripped) > max_title_chars
|
||||
or word_count > max_title_words
|
||||
or bool(re.search(r"\\s{3,}", stripped))
|
||||
or "http://" in lower
|
||||
or "https://" in lower
|
||||
or (stripped.endswith(".") and word_count > 8)
|
||||
)
|
||||
if not title_lines and looks_body_like:
|
||||
title_lines.append(_compact_title(stripped, max_chars=72))
|
||||
break
|
||||
# Stop title capture when we hit body-like lines.
|
||||
if title_lines:
|
||||
if "http://" in lower or "https://" in lower:
|
||||
break
|
||||
if len(stripped) > max_title_chars:
|
||||
break
|
||||
if word_count > max_title_words:
|
||||
break
|
||||
if stripped.endswith(".") and word_count > 8:
|
||||
break
|
||||
# Global caps: don't let multiple short lines turn into a paragraph-sized title.
|
||||
if total_words + word_count > max_total_words:
|
||||
break
|
||||
if total_chars + len(stripped) > max_total_chars:
|
||||
break
|
||||
if len(title_lines) >= max_title_lines:
|
||||
break
|
||||
title_lines.append(stripped)
|
||||
total_words += len(stripped.split())
|
||||
total_chars += len(stripped)
|
||||
i += 1
|
||||
while i < len(lines) and not lines[i].strip():
|
||||
i += 1
|
||||
|
|
@ -1547,10 +1632,12 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None:
|
|||
)
|
||||
|
||||
if title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper:
|
||||
is_llm_context = any(k in title_upper for k in ["LLM", "MODEL", "RAG", "PROMPT"])
|
||||
return ctx.pick_unique(
|
||||
kind="diagram:architecture",
|
||||
key=title,
|
||||
variants=[
|
||||
(
|
||||
"""flowchart TD
|
||||
A["User"] --> B["App"]
|
||||
B --> C["LLM"]
|
||||
|
|
@ -1559,6 +1646,16 @@ def _inferred_mermaid(title: str, *, ctx: _RenderContext) -> str | None:
|
|||
D --> F["External systems"]
|
||||
E --> C
|
||||
"""
|
||||
if is_llm_context
|
||||
else """flowchart TD
|
||||
A["Actor"] --> B["Workflow / system"]
|
||||
B --> C["Policy decision (rules)"]
|
||||
C --> D["Gate: enforce / block"]
|
||||
D --> E["Evidence signals (logs)"]
|
||||
E --> F["Audit / review cycle"]
|
||||
F --> C
|
||||
"""
|
||||
)
|
||||
],
|
||||
used=ctx.used_diagrams,
|
||||
)
|
||||
|
|
@ -2035,7 +2132,7 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
|
|||
ctx=ctx,
|
||||
key=section.title,
|
||||
)
|
||||
if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "access request", "screenshot"):
|
||||
if "REQUEST EVIDENCE" in title_upper or _has(excerpt, "screenshot", "attestation"):
|
||||
return _daveify_callout_reframe(
|
||||
"\n".join(
|
||||
[
|
||||
|
|
@ -2097,8 +2194,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
|
|||
)
|
||||
|
||||
if ctx.locale.lower().startswith("fr"):
|
||||
anchors = _extract_numeric_anchors(section.body, limit=2)
|
||||
anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else ""
|
||||
anchors = _extract_numeric_anchors(section.body, limit=4)
|
||||
anchor = ""
|
||||
for candidate in anchors:
|
||||
# Skip citation years in callouts; they read like hallucinated trivia.
|
||||
if re.fullmatch(r"20\d{2}", candidate):
|
||||
continue
|
||||
anchor = candidate
|
||||
break
|
||||
anchor_hint = f" ({anchor})" if anchor else ""
|
||||
variants = [
|
||||
"\n".join(
|
||||
[
|
||||
|
|
@ -2151,8 +2255,15 @@ def _render_dave_factor_callout(section: _SourceSection, *, ctx: _RenderContext)
|
|||
if not section.body.strip():
|
||||
return None
|
||||
|
||||
anchors = _extract_numeric_anchors(excerpt, limit=2)
|
||||
anchor_hint = f" (anchors: {', '.join(anchors)})" if anchors else ""
|
||||
anchors = _extract_numeric_anchors(excerpt, limit=4)
|
||||
anchor = ""
|
||||
for candidate in anchors:
|
||||
# Skip citation years in callouts; they read like hallucinated trivia.
|
||||
if re.fullmatch(r"20\d{2}", candidate):
|
||||
continue
|
||||
anchor = candidate
|
||||
break
|
||||
anchor_hint = f" ({anchor})" if anchor else ""
|
||||
variants = [
|
||||
"\n".join(
|
||||
[
|
||||
|
|
@ -2194,8 +2305,14 @@ def _render_punchline_closer(section: _SourceSection, *, ctx: _RenderContext) ->
|
|||
if not section.body.strip():
|
||||
return None
|
||||
|
||||
anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=2)
|
||||
anchor = anchors[0] if anchors else ""
|
||||
anchors = _extract_numeric_anchors(f"{section.why_it_matters or ''}\n{section.body}".strip(), limit=4)
|
||||
anchor = ""
|
||||
for candidate in anchors:
|
||||
# Avoid anchoring punchlines to random citation years unless the year is actually part of the section title.
|
||||
if re.fullmatch(r"20\d{2}", candidate) and candidate not in section.title:
|
||||
continue
|
||||
anchor = candidate
|
||||
break
|
||||
anchor_hint = f" ({anchor})" if anchor else ""
|
||||
|
||||
if ctx.locale.lower().startswith("fr"):
|
||||
|
|
@ -2321,6 +2438,17 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
|
|||
]
|
||||
)
|
||||
elif title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper:
|
||||
if ctx.voice == "v2.0":
|
||||
variants = [
|
||||
"Architecture diagrams are where optimism meets the enforcement boundary (and quietly loses).",
|
||||
"Architecture diagrams are forwardable; boundaries are enforceable. Dave prefers the version you can screenshot.",
|
||||
"Architecture diagrams are the happy path. The first exception request is the real design review.",
|
||||
"Architecture diagrams define components; governance defines who can bypass them. Only one survives audit week.",
|
||||
"Architecture diagrams are the part everyone agrees on, until we name what blocks and who owns the exception path.",
|
||||
]
|
||||
key = f"{section.title}:{_sha256_text(section.body)[:8]}"
|
||||
paragraphs.append(ctx.pick_unique(kind="paragraph:architecture", key=key, variants=variants, used=ctx.used_paragraphs))
|
||||
else:
|
||||
paragraphs.extend(
|
||||
[
|
||||
"Architecture diagrams are where optimism goes to be audited.",
|
||||
|
|
@ -2363,6 +2491,17 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
|
|||
]
|
||||
)
|
||||
elif "SECURITY TEAM" in title_upper or "SECURITY REVIEW" in title_upper:
|
||||
if ctx.voice == "v2.0":
|
||||
variants = [
|
||||
'Security team efficiency is a legitimate goal, especially when queues become the organization’s truth serum. The risk is claiming throughput without defining what “review complete” means or what evidence proves it.',
|
||||
'Faster reviews are defensible; unmeasured reviews are theater. Define “complete,” define the evidence, and make drift visible before the next audit season.',
|
||||
'If the security team is the bottleneck, speed matters. If speed is the metric, definitions matter: what counts as reviewed, and what signal proves it stayed reviewed?',
|
||||
'Throughput improvements only count if “done” is defined. Otherwise we are measuring calendar velocity and calling it assurance.',
|
||||
'Reducing review time is fine. Let’s just avoid the classic move: declare success, then argue about the definition of “review” when incidents arrive.',
|
||||
]
|
||||
key = f"{section.title}:{_sha256_text(section.body)[:8]}"
|
||||
paragraphs.append(ctx.pick_unique(kind="paragraph:sec_team", key=key, variants=variants, used=ctx.used_paragraphs))
|
||||
else:
|
||||
paragraphs.extend(
|
||||
[
|
||||
"Security team efficiency is a legitimate goal, especially when review queues become the organizational truth serum.",
|
||||
|
|
@ -2517,41 +2656,41 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str:
|
|||
anchors = _extract_numeric_anchors(section.body, limit=2)
|
||||
if ctx.locale.lower().startswith("fr"):
|
||||
anchor_hint = f" (repères : {', '.join(anchors)})" if anchors else ""
|
||||
display_title = _compact_title(section.title, max_chars=72)
|
||||
variants = [
|
||||
f"Nous sommes alignés sur **{section.title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt qu’en langage de confort.",
|
||||
f"**{section.title}**{anchor_hint} est l’endroit où la crédibilité se fabrique ; le risque « Dave » consiste à en faire une séance de ressenti plutôt qu’une frontière d’application.",
|
||||
f"Cette partie (**{section.title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.",
|
||||
f"Dans **{section.title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.",
|
||||
f"**{section.title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.",
|
||||
f"Nous aimons l’intention de **{section.title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.",
|
||||
f"**{section.title}**{anchor_hint} promet du réalisme. Rendons-le mesurable : point de départ, écart, et un artefact de preuve qui ne nécessite pas un pèlerinage dans un dossier partagé.",
|
||||
f"Voici **{section.title}**{anchor_hint} : la partie où nous sommes d’accord en principe. Le geste red-team : s’accorder aussi sur ce qui bloque, ce qui alerte, et qui détient l’exception.",
|
||||
f"Nous sommes alignés sur **{display_title}** comme repère narratif{anchor_hint}, à condition de le traduire en contraintes vérifiables plutôt qu’en langage de confort.",
|
||||
f"Cette partie (**{display_title}**){anchor_hint} sera citée en réunion. Extraire un responsable de décision et une porte de contrôle, pour que ce soit exécutable, et non simplement inspirant.",
|
||||
f"Dans **{display_title}**{anchor_hint}, on voit le plan devenir « compatible parties prenantes ». La contre-mesure consiste à le retraduire en responsables, échéances et critères de blocage.",
|
||||
f"**{display_title}**{anchor_hint} est le sanctuaire des hypothèses. Les expliciter maintenant évite de les redécouvrir plus tard, au moment où le calendrier devient émotionnellement complexe.",
|
||||
f"Nous aimons l’intention de **{display_title}**{anchor_hint}. Le risque pratique : que cela devienne une diapositive ; la contre-mesure : en faire une liste de contrôle avec date de péremption.",
|
||||
]
|
||||
else:
|
||||
anchor_hint = f" (notably: {', '.join(anchors)})" if anchors else ""
|
||||
display_title = _compact_title(section.title, max_chars=72)
|
||||
if ctx.voice == "v1.6":
|
||||
variants = [
|
||||
f"**{section.title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.",
|
||||
f"Treat **{section.title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.",
|
||||
f"**{section.title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.",
|
||||
f"In **{section.title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.",
|
||||
f"**{section.title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”",
|
||||
f"**{display_title}**{anchor_hint} will be quoted in meetings. Extract an owner, a gate, and a stop condition so it survives the next review cycle.",
|
||||
f"Treat **{display_title}**{anchor_hint} as a control surface: define what blocks, what warns, and who owns the exception pathway.",
|
||||
f"**{display_title}**{anchor_hint} reads like a plan until it meets incentives. Translate it into constraints before it turns into comfort language.",
|
||||
f"In **{display_title}**{anchor_hint}, the work becomes stakeholder-safe. The counter-move is to make enforcement explicit and exceptions time-bounded.",
|
||||
f"**{display_title}**{anchor_hint} is where assumptions hide. Name them now, or they will reappear later as “unexpected complexity.”",
|
||||
]
|
||||
else:
|
||||
variants = [
|
||||
f"We are aligned on **{section.title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.",
|
||||
f"**{section.title}** is where credibility is manufactured{anchor_hint}; the Dave failure mode is to treat it as a vibe check instead of a boundary on applicability.",
|
||||
f"This section (**{section.title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.",
|
||||
f"In **{section.title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.",
|
||||
f"**{section.title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.",
|
||||
f"We love the intent behind **{section.title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.",
|
||||
f"**{section.title}**{anchor_hint} reads as a promise of realism. Make realism measurable: baseline, delta, and an evidence artifact that doesn't require a shared drive pilgrimage.",
|
||||
f"This is **{section.title}**{anchor_hint}: the part where we agree in principle. The red-team ask is that we also agree on what blocks, what warns, and who owns the exception path.",
|
||||
f"We are aligned on **{display_title}** as a narrative anchor{anchor_hint}, and we recommend turning it into constraints rather than comfort language.",
|
||||
f"This section (**{display_title}**){anchor_hint} will be quoted in meetings. Extract one decision owner and one gate so it becomes executable, not inspirational.",
|
||||
f"In **{display_title}**{anchor_hint}, we can see the plan being translated into stakeholder-safe language. The counter-move is to translate it back into owners, deadlines, and stop conditions.",
|
||||
f"**{display_title}**{anchor_hint} is the spiritual home of assumptions. Make them explicit now, because they will be rediscovered later when timelines get emotionally complex.",
|
||||
f"We love the intent behind **{display_title}**{anchor_hint}. The practical risk is that it becomes a slide; the mitigation is to make it a checklist with an expiry date.",
|
||||
]
|
||||
|
||||
paragraphs.append(ctx.pick_unique(kind="paragraph:fallback", key=section.title, variants=variants, used=ctx.used_paragraphs))
|
||||
|
||||
out: list[str] = [f"## {section.title}"]
|
||||
raw_title = section.title
|
||||
heading_title = _compact_title(raw_title, max_chars=72) if ctx.voice == "v2.0" else raw_title
|
||||
out: list[str] = [f"## {heading_title}"]
|
||||
if heading_title != raw_title:
|
||||
out.extend(["", f"> {raw_title}"])
|
||||
if section.why_it_matters:
|
||||
out.extend(["", section.why_it_matters, ""])
|
||||
else:
|
||||
|
|
@ -2867,6 +3006,158 @@ def _render_action_pack(sections: list[_SourceSection]) -> str:
|
|||
return "\n".join(out).strip()
|
||||
|
||||
|
||||
def _render_action_pack_v2_0(*, sections: list[_SourceSection], normalized_text: str, locale: str) -> str:
|
||||
"""
|
||||
IF.DAVE v2.0: reduce Action Pack boilerplate by selecting a small set of representative
|
||||
sections (3–5) that cover distinct gates where possible.
|
||||
"""
|
||||
|
||||
candidates = _action_pack_sections(sections)
|
||||
if not candidates:
|
||||
return ""
|
||||
|
||||
# Prefer breadth: pick one best section per gate (by body length), preserving first-seen gate order.
|
||||
by_gate: dict[str, list[_SourceSection]] = {}
|
||||
gate_order: list[str] = []
|
||||
for sec in candidates:
|
||||
gate = _action_pack_gate(sec)
|
||||
if gate not in by_gate:
|
||||
by_gate[gate] = []
|
||||
gate_order.append(gate)
|
||||
by_gate[gate].append(sec)
|
||||
|
||||
selected: list[_SourceSection] = []
|
||||
for gate in gate_order:
|
||||
secs = sorted(by_gate[gate], key=lambda s: len((s.body or "").strip()), reverse=True)
|
||||
if secs:
|
||||
selected.append(secs[0])
|
||||
if len(selected) >= 5:
|
||||
break
|
||||
|
||||
# If we have <3 distinct gates, pad with longest remaining sections (still capped at 5).
|
||||
if len(selected) < 3:
|
||||
def title_key(sec: _SourceSection) -> str:
|
||||
return " ".join((sec.title or "").split()).strip().upper()
|
||||
|
||||
seen_titles = {title_key(s) for s in selected}
|
||||
remaining = [s for s in candidates if s not in selected]
|
||||
remaining.sort(key=lambda s: len((s.body or "").strip()), reverse=True)
|
||||
for sec in remaining:
|
||||
key = title_key(sec)
|
||||
if key in seen_titles:
|
||||
continue
|
||||
selected.append(sec)
|
||||
seen_titles.add(key)
|
||||
if len(selected) >= 3:
|
||||
break
|
||||
selected = selected[:5]
|
||||
|
||||
out: list[str] = [
|
||||
"## Action Pack (Operational)" if not locale.lower().startswith("fr") else "## Action Pack (Opérationnel)",
|
||||
"",
|
||||
"This appendix turns the mirror into Monday-morning work: owners, gates, stop conditions, and evidence artifacts."
|
||||
if not locale.lower().startswith("fr")
|
||||
else "Cet appendice transforme le miroir en travail exécutable : responsables, portes, critères de blocage, et artefacts de preuve.",
|
||||
"Keep it generic and auditable; adapt to your tooling without inventing fake implementation details."
|
||||
if not locale.lower().startswith("fr")
|
||||
else "Restez générique et auditable ; adaptez à vos outils sans inventer de fausses implémentations.",
|
||||
"Minimum telemetry schema (when you claim “verifiable signals”): event_type, emitter, freshness_window, owner."
|
||||
if not locale.lower().startswith("fr")
|
||||
else "Schéma minimum de télémétrie (quand vous promettez des “signaux vérifiables”) : event_type, emitter, freshness_window, owner.",
|
||||
"",
|
||||
"### Control Cards" if not locale.lower().startswith("fr") else "### Cartes de contrôle",
|
||||
]
|
||||
|
||||
for sec in selected:
|
||||
display_title = _compact_title(sec.title, max_chars=72)
|
||||
gate = _action_pack_gate(sec)
|
||||
out.extend(
|
||||
[
|
||||
"",
|
||||
f"#### {display_title}",
|
||||
"",
|
||||
(
|
||||
f'- **Control objective:** Turn "{display_title}" into an enforceable workflow (not a narrative).'
|
||||
if not locale.lower().startswith("fr")
|
||||
else f'- **Objectif de contrôle :** Rendre "{display_title}" opposable (pas seulement narratif).'
|
||||
),
|
||||
f"- **Gate:** {gate}" if not locale.lower().startswith("fr") else f"- **Porte :** {gate}",
|
||||
f"- **Owner (RACI):** {_action_pack_owner(gate)}"
|
||||
if not locale.lower().startswith("fr")
|
||||
else f"- **Responsable (RACI) :** {_action_pack_owner(gate)}",
|
||||
f"- **Stop condition:** {_action_pack_stop_condition(gate)}"
|
||||
if not locale.lower().startswith("fr")
|
||||
else f"- **Critère de blocage :** {_action_pack_stop_condition(gate)}",
|
||||
f"- **Evidence artifact:** {_action_pack_evidence(gate)}"
|
||||
if not locale.lower().startswith("fr")
|
||||
else f"- **Artefact de preuve :** {_action_pack_evidence(gate)}",
|
||||
]
|
||||
)
|
||||
|
||||
out.extend(["", "### Backlog Export (Jira-ready)" if not locale.lower().startswith("fr") else "### Backlog (Jira-ready)", ""])
|
||||
for idx, sec in enumerate(selected, 1):
|
||||
gate = _action_pack_gate(sec)
|
||||
display_title = _compact_title(sec.title, max_chars=72)
|
||||
out.extend(
|
||||
[
|
||||
f"{idx}. [{gate}] {display_title}: define owner, gate, and stop condition"
|
||||
if not locale.lower().startswith("fr")
|
||||
else f"{idx}. [{gate}] {display_title} : définir propriétaire, porte, et critère de blocage",
|
||||
(
|
||||
" - Acceptance: owner assigned; stop condition documented and approved."
|
||||
if not locale.lower().startswith("fr")
|
||||
else " - Acceptance : responsable assigné ; critère de blocage documenté et approuvé."
|
||||
),
|
||||
(
|
||||
" - Acceptance: evidence artifact defined and stored (machine-generated where possible)."
|
||||
if not locale.lower().startswith("fr")
|
||||
else " - Acceptance : artefact de preuve défini et stocké (machine-généré si possible)."
|
||||
),
|
||||
(
|
||||
" - Acceptance: exceptions require owner + expiry; expiry is enforced automatically."
|
||||
if not locale.lower().startswith("fr")
|
||||
else " - Acceptance : exceptions = responsable + expiration ; expiration appliquée automatiquement."
|
||||
),
|
||||
]
|
||||
)
|
||||
|
||||
out.extend(
|
||||
[
|
||||
"",
|
||||
"### Policy-as-Code Appendix (pseudo-YAML)" if not locale.lower().startswith("fr") else "### Annexe policy-as-code (pseudo-YAML)",
|
||||
"",
|
||||
"```yaml",
|
||||
"gates:",
|
||||
" pr:",
|
||||
" - name: \"risk scanning\"",
|
||||
" stop_condition: \"block on high severity (or unknown)\"",
|
||||
" evidence: \"scan_event_id + policy_version\"",
|
||||
" access:",
|
||||
" - name: \"assistant enablement\"",
|
||||
" prerequisite: \"device baseline + local scan signal\"",
|
||||
" stop_condition: \"deny when signals missing\"",
|
||||
" evidence: \"access_grant_event + prerequisite_check\"",
|
||||
" runtime:",
|
||||
" - name: \"tool-use\"",
|
||||
" prerequisite: \"allowlist + validation\"",
|
||||
" stop_condition: \"block disallowed actions\"",
|
||||
" evidence: \"execution_log_id + allowlist_version\"",
|
||||
"exceptions:",
|
||||
" expiry_days: 14",
|
||||
" require_owner: true",
|
||||
" require_reason: true",
|
||||
"evidence:",
|
||||
" freshness_days: 30",
|
||||
" require_hash: true",
|
||||
"```",
|
||||
]
|
||||
)
|
||||
|
||||
# Standards sources: translation table lives in the main body; Action Pack remains minimal and opposable.
|
||||
_ = normalized_text
|
||||
return "\n".join(out).strip()
|
||||
|
||||
|
||||
def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str) -> str:
|
||||
today = _dt.date.today().isoformat()
|
||||
normalized = _normalize_ocr(source_text)
|
||||
|
|
@ -2932,7 +3223,8 @@ def _generate_dave_v1_2_mirror(*, source_text: str, source_path: str, action_pac
|
|||
f"## {cover_h1}",
|
||||
]
|
||||
if cover_h2:
|
||||
out.extend([f"### {cover_h2}", ""])
|
||||
cover_h2_out = _compact_title(cover_h2, max_chars=90) if style_version == "v2.0" else cover_h2
|
||||
out.extend([f"### {cover_h2_out}", ""])
|
||||
else:
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3068,8 +3360,16 @@ def _generate_dave_v1_3_mirror(*, source_text: str, source_path: str, action_pac
|
|||
f"## {cover_h1}",
|
||||
]
|
||||
)
|
||||
cover_h2_out = ""
|
||||
if cover_h2:
|
||||
out.extend([f"### {cover_h2}", ""])
|
||||
if style_version == "v2.0":
|
||||
# Avoid rendering body fragments / author blocks as a subtitle.
|
||||
if not _looks_like_cover_subtitle_noise(cover_h2):
|
||||
cover_h2_out = _compact_title(cover_h2, max_chars=90)
|
||||
else:
|
||||
cover_h2_out = cover_h2
|
||||
if cover_h2_out:
|
||||
out.extend([f"### {cover_h2_out}", ""])
|
||||
else:
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3189,8 +3489,15 @@ def _generate_dave_v1_6_mirror(*, source_text: str, source_path: str, action_pac
|
|||
f"## {cover_h1}",
|
||||
]
|
||||
)
|
||||
cover_h2_out = ""
|
||||
if cover_h2:
|
||||
out.extend([f"### {cover_h2}", ""])
|
||||
if style_version == "v2.0":
|
||||
if not _looks_like_cover_subtitle_noise(cover_h2):
|
||||
cover_h2_out = _compact_title(cover_h2, max_chars=90)
|
||||
else:
|
||||
cover_h2_out = cover_h2
|
||||
if cover_h2_out:
|
||||
out.extend([f"### {cover_h2_out}", ""])
|
||||
else:
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3319,8 +3626,15 @@ def _generate_dave_v1_7_mirror(*, source_text: str, source_path: str, action_pac
|
|||
f"## {cover_h1}",
|
||||
]
|
||||
)
|
||||
cover_h2_out = ""
|
||||
if cover_h2:
|
||||
out.extend([f"### {cover_h2}", ""])
|
||||
if style_version == "v2.0":
|
||||
if not _looks_like_cover_subtitle_noise(cover_h2):
|
||||
cover_h2_out = _compact_title(cover_h2, max_chars=90)
|
||||
else:
|
||||
cover_h2_out = cover_h2
|
||||
if cover_h2_out:
|
||||
out.extend([f"### {cover_h2_out}", ""])
|
||||
else:
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3428,9 +3742,13 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s
|
|||
return False
|
||||
# Avoid internal extraction artifacts and navigation noise.
|
||||
lower = s.lower()
|
||||
# Avoid low-signal page headers like "… | Datasheet 1".
|
||||
if "datasheet" in lower:
|
||||
return False
|
||||
if "trace id" in lower:
|
||||
return False
|
||||
if lower.startswith("http://") or lower.startswith("https://"):
|
||||
# Claims register is for measurable statements, not links (URLs cause broken rendering/quoting).
|
||||
if "http://" in lower or "https://" in lower:
|
||||
return False
|
||||
if lower in {"markdown content:", "url source:"}:
|
||||
return False
|
||||
|
|
@ -3442,7 +3760,18 @@ def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[s
|
|||
for ln in lines:
|
||||
if not keep(ln):
|
||||
continue
|
||||
if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln:
|
||||
if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln and "€" not in ln and "£" not in ln:
|
||||
continue
|
||||
# Numeric guardrail: ignore lines where the only digits are untyped singletons (e.g., "Datasheet 1").
|
||||
lower = ln.lower()
|
||||
has_multi_digit = bool(re.search(r"\b\d{2,}\b", ln))
|
||||
has_typed_singleton = bool(
|
||||
re.search(
|
||||
r"\b\d+\s*(?:ms|s|sec|secs|seconds|min|mins|minutes|hour|hours|day|days|x|gb|tb|mb|kb)\b",
|
||||
lower,
|
||||
)
|
||||
)
|
||||
if not ("%" in ln or "$" in ln or "€" in ln or "£" in ln or has_multi_digit or has_typed_singleton):
|
||||
continue
|
||||
# Skip obviously broken glyph runs.
|
||||
if sum(1 for ch in ln if " " <= ch <= "~") < max(8, int(len(ln) * 0.5)):
|
||||
|
|
@ -3508,6 +3837,404 @@ def _render_translation_table(*, normalized_text: str, locale: str) -> str:
|
|||
return "\n".join(out).strip()
|
||||
|
||||
|
||||
def _normalize_mermaid_block(text: str) -> str:
|
||||
return "\n".join([ln.rstrip() for ln in str(text).strip().splitlines() if ln.strip()])
|
||||
|
||||
|
||||
def _diagram_label(diagram: str, *, locale: str) -> str:
|
||||
d = diagram.lower()
|
||||
if "pendingreview" in d or "exception" in d:
|
||||
return "Stase d’exception" if locale.lower().startswith("fr") else "Exception stasis"
|
||||
if "screenshot" in d or "artifact" in d or "evidence" in d or "attestation" in d:
|
||||
return "Boucle de dérive de preuve" if locale.lower().startswith("fr") else "Evidence drift loop"
|
||||
if "policy decision point" in d or "pdp" in d or "policy enforcement point" in d or "pep" in d:
|
||||
return "Chaîne PDP/PEP" if locale.lower().startswith("fr") else "PDP/PEP chain"
|
||||
if "rag store" in d or ("llm" in d and "tools" in d):
|
||||
return "Architecture LLM + outils" if locale.lower().startswith("fr") else "LLM toolchain architecture"
|
||||
if "questionnaire" in d or "vendor" in d or "third-party" in d:
|
||||
return "Boucle tiers / fournisseurs" if locale.lower().startswith("fr") else "Third‑party loop"
|
||||
return "Boucle de contrôle (inférée)" if locale.lower().startswith("fr") else "Inferred control loop"
|
||||
|
||||
|
||||
def _apply_dave_v2_0_postprocess(md: str, *, locale: str) -> str:
|
||||
"""
|
||||
IF.DAVE v2.0 hardening pass:
|
||||
- Limit Dave Factor callouts to 1–2 per dossier (keep highest-signal sections).
|
||||
- Deduplicate Mermaid diagrams: render each unique diagram once in an Annex and reference it in-body.
|
||||
"""
|
||||
|
||||
lines = md.splitlines()
|
||||
|
||||
# Identify the footer separator (last '---') so we can insert an Annex above it.
|
||||
footer_sep_idx = None
|
||||
in_fence = False
|
||||
for idx, ln in enumerate(lines):
|
||||
stripped = ln.strip()
|
||||
if stripped.startswith("```"):
|
||||
in_fence = not in_fence
|
||||
continue
|
||||
if in_fence:
|
||||
continue
|
||||
if stripped == "---":
|
||||
footer_sep_idx = idx
|
||||
if footer_sep_idx is None:
|
||||
footer_sep_idx = len(lines)
|
||||
|
||||
# 1) Callout budget: find callout blocks and keep up to 2.
|
||||
callouts: list[dict] = []
|
||||
current_section = ""
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
ln = lines[i]
|
||||
stripped = ln.strip()
|
||||
if stripped.startswith("## "):
|
||||
current_section = stripped[3:].strip()
|
||||
is_callout = stripped.startswith("> **The Dave Factor:**") or stripped.startswith("> **Le facteur Dave :**")
|
||||
if is_callout:
|
||||
start = i
|
||||
j = i
|
||||
while j < len(lines) and lines[j].strip().startswith(">"):
|
||||
j += 1
|
||||
block_text = "\n".join(lines[start:j]).strip()
|
||||
callouts.append({"start": start, "end": j, "section": current_section, "text": block_text})
|
||||
i = j
|
||||
continue
|
||||
i += 1
|
||||
|
||||
def score_callout(section_title: str, text: str) -> int:
|
||||
s = (section_title or "").upper()
|
||||
t = (text or "").lower()
|
||||
score = 0
|
||||
if any(k in s for k in ["REQUEST EVIDENCE", "ACCESS REQUEST", "LOCAL SECURITY", "REQUEST EVIDENCE"]):
|
||||
score += 120
|
||||
if "screenshot" in t or "attestation" in t:
|
||||
score += 110
|
||||
if "audit" in s or "compliance" in s:
|
||||
score += 95
|
||||
if "training" in s or "quiz" in t or "certificate" in t:
|
||||
score += 75
|
||||
if any(k in t for k in ["fips", "piv", "fido", "aal"]):
|
||||
score += 70
|
||||
if "roadmap" in t or "alignment session" in t:
|
||||
score += 25
|
||||
return score
|
||||
|
||||
keep_callouts: set[int] = set()
|
||||
if callouts:
|
||||
ranked = sorted(
|
||||
enumerate(callouts),
|
||||
key=lambda it: (-score_callout(it[1]["section"], it[1]["text"]), it[1]["start"]),
|
||||
)
|
||||
seen_hashes: set[str] = set()
|
||||
for idx, c in ranked:
|
||||
sig_lines: list[str] = []
|
||||
for ln in str(c["text"]).splitlines():
|
||||
s = ln.strip()
|
||||
if s.startswith("> **The Dave Factor:**") or s.startswith("> **Le facteur Dave :**"):
|
||||
sig_lines.append(s)
|
||||
if s.startswith("> **Countermeasure:**") or s.startswith("> **Contre-mesure :**"):
|
||||
sig_lines.append(s)
|
||||
if len(sig_lines) >= 2:
|
||||
break
|
||||
signature = "\n".join(sig_lines).strip() or str(c["text"]).strip()
|
||||
h = _sha256_text(signature)
|
||||
if h in seen_hashes:
|
||||
continue
|
||||
keep_callouts.add(idx)
|
||||
seen_hashes.add(h)
|
||||
if len(keep_callouts) >= 2:
|
||||
break
|
||||
|
||||
# 2) Mermaid dedupe: collect all mermaid code fences, remove in-body blocks and replace with references.
|
||||
diagrams: list[dict] = []
|
||||
current_section = ""
|
||||
in_other_fence = False
|
||||
i = 0
|
||||
while i < len(lines):
|
||||
stripped = lines[i].strip()
|
||||
if stripped.startswith("```") and stripped != "```mermaid":
|
||||
in_other_fence = not in_other_fence
|
||||
i += 1
|
||||
continue
|
||||
if in_other_fence:
|
||||
i += 1
|
||||
continue
|
||||
if stripped.startswith("## "):
|
||||
current_section = stripped[3:].strip()
|
||||
i += 1
|
||||
continue
|
||||
if stripped == "```mermaid":
|
||||
start = i
|
||||
j = i + 1
|
||||
while j < len(lines) and lines[j].strip() != "```":
|
||||
j += 1
|
||||
end = min(len(lines), j + 1)
|
||||
raw = "\n".join(lines[start + 1 : j])
|
||||
norm = _normalize_mermaid_block(raw)
|
||||
if norm:
|
||||
diagrams.append(
|
||||
{
|
||||
"start": start,
|
||||
"end": end,
|
||||
"section": current_section,
|
||||
"raw": raw.rstrip(),
|
||||
"norm": norm,
|
||||
}
|
||||
)
|
||||
i = end
|
||||
continue
|
||||
i += 1
|
||||
|
||||
unique_diagrams: list[dict] = []
|
||||
seen: set[str] = set()
|
||||
for dgm in diagrams:
|
||||
if dgm["norm"] in seen:
|
||||
continue
|
||||
seen.add(dgm["norm"])
|
||||
unique_diagrams.append(dgm)
|
||||
|
||||
# Guarantee at least two diagrams by adding safe inferred defaults if needed.
|
||||
def ensure_default_diagram(content: str) -> None:
|
||||
norm = _normalize_mermaid_block(content)
|
||||
if not norm or norm in seen:
|
||||
return
|
||||
seen.add(norm)
|
||||
unique_diagrams.append({"raw": content.rstrip(), "norm": norm, "section": ""})
|
||||
|
||||
if len(unique_diagrams) < 2:
|
||||
ensure_default_diagram(
|
||||
"\n".join(
|
||||
[
|
||||
"flowchart TD",
|
||||
' A["Control intent"] --> B["Manual evidence requested"]',
|
||||
' B --> C["Artifact produced"]',
|
||||
' C --> D["Dashboard goes green"]',
|
||||
' D --> E["Exceptions accumulate"]',
|
||||
' E --> F["Definition of compliance shifts"]',
|
||||
" F --> B",
|
||||
]
|
||||
)
|
||||
)
|
||||
if len(unique_diagrams) < 2:
|
||||
ensure_default_diagram(
|
||||
"\n".join(
|
||||
[
|
||||
"stateDiagram-v2",
|
||||
" [*] --> Requested",
|
||||
' Requested --> PendingReview: \"needs alignment\"',
|
||||
" PendingReview --> PendingReview: renewal",
|
||||
" PendingReview --> Approved: silence",
|
||||
' Approved --> Approved: \"temporary\" extension',
|
||||
]
|
||||
)
|
||||
)
|
||||
|
||||
# Build stable diagram names.
|
||||
label_counts: dict[str, int] = {}
|
||||
diagram_names: dict[str, str] = {}
|
||||
for dgm in unique_diagrams:
|
||||
label = _diagram_label(dgm.get("raw", ""), locale=locale)
|
||||
label_counts[label] = label_counts.get(label, 0) + 1
|
||||
used_seq: dict[str, int] = {}
|
||||
for dgm in unique_diagrams:
|
||||
label = _diagram_label(dgm.get("raw", ""), locale=locale)
|
||||
used_seq[label] = used_seq.get(label, 0) + 1
|
||||
suffix = f" ({used_seq[label]})" if label_counts.get(label, 0) > 1 else ""
|
||||
diagram_names[dgm["norm"]] = f"{label}{suffix}"
|
||||
|
||||
# Rebuild document: remove callouts beyond budget, remove mermaid blocks, and insert Annex before footer.
|
||||
remove_ranges: list[tuple[int, int]] = []
|
||||
for idx, c in enumerate(callouts):
|
||||
if idx not in keep_callouts:
|
||||
remove_ranges.append((c["start"], c["end"]))
|
||||
for dgm in diagrams:
|
||||
# Remove the optional diagram heading directly above inferred diagrams (best effort).
|
||||
start = dgm["start"]
|
||||
maybe_heading = start - 2
|
||||
if maybe_heading >= 0:
|
||||
h0 = lines[maybe_heading].strip()
|
||||
h1 = lines[maybe_heading + 1].strip() if maybe_heading + 1 < len(lines) else ""
|
||||
if h0.startswith("###") and "diagram" in h0.lower() and h1 == "":
|
||||
start = maybe_heading
|
||||
remove_ranges.append((start, dgm["end"]))
|
||||
|
||||
# Merge / normalize ranges.
|
||||
remove_ranges.sort()
|
||||
merged: list[tuple[int, int]] = []
|
||||
for start, end in remove_ranges:
|
||||
if not merged or start > merged[-1][1]:
|
||||
merged.append((start, end))
|
||||
else:
|
||||
merged[-1] = (merged[-1][0], max(merged[-1][1], end))
|
||||
|
||||
out_lines: list[str] = []
|
||||
i = 0
|
||||
range_idx = 0
|
||||
referenced: set[str] = set()
|
||||
while i < len(lines):
|
||||
if range_idx < len(merged) and i == merged[range_idx][0]:
|
||||
end = merged[range_idx][1]
|
||||
# If this range was a diagram, replace with a reference line (based on the diagram norm if we can find it).
|
||||
# Best effort: find the mermaid start inside this range.
|
||||
ref = None
|
||||
for dgm in diagrams:
|
||||
if dgm["start"] >= merged[range_idx][0] and dgm["end"] <= merged[range_idx][1]:
|
||||
name = diagram_names.get(dgm["norm"])
|
||||
if name:
|
||||
if dgm["norm"] not in referenced:
|
||||
ref = f"See Annex: {name}."
|
||||
referenced.add(dgm["norm"])
|
||||
break
|
||||
if ref:
|
||||
out_lines.append(ref)
|
||||
out_lines.append("")
|
||||
i = end
|
||||
range_idx += 1
|
||||
continue
|
||||
out_lines.append(lines[i])
|
||||
i += 1
|
||||
|
||||
# Remove empty/legacy inferred-diagram annex headings (we insert our own).
|
||||
legacy_annex_titles = {
|
||||
"## Annex (inferred diagrams)",
|
||||
"## Annexes (diagrammes inférés)",
|
||||
}
|
||||
cleaned: list[str] = []
|
||||
in_legacy_annex = False
|
||||
for ln in out_lines:
|
||||
stripped = ln.strip()
|
||||
if stripped in legacy_annex_titles:
|
||||
in_legacy_annex = True
|
||||
continue
|
||||
if in_legacy_annex and stripped == "---":
|
||||
in_legacy_annex = False
|
||||
cleaned.append(ln)
|
||||
continue
|
||||
if in_legacy_annex and stripped.startswith("## "):
|
||||
in_legacy_annex = False
|
||||
if in_legacy_annex:
|
||||
continue
|
||||
cleaned.append(ln)
|
||||
|
||||
# Insert annex right before footer separator.
|
||||
footer_sep_idx = None
|
||||
in_fence = False
|
||||
for idx, ln in enumerate(cleaned):
|
||||
stripped = ln.strip()
|
||||
if stripped.startswith("```"):
|
||||
in_fence = not in_fence
|
||||
continue
|
||||
if in_fence:
|
||||
continue
|
||||
if stripped == "---":
|
||||
footer_sep_idx = idx
|
||||
if footer_sep_idx is None:
|
||||
footer_sep_idx = len(cleaned)
|
||||
|
||||
if locale.lower().startswith("fr"):
|
||||
annex_title = "## Annexes (actifs partagés)"
|
||||
annex_note = "_Diagrammes dédupliqués : chaque schéma unique est rendu une fois ici ; les sections y renvoient._"
|
||||
diag_title = "### Diagrammes (dédupliqués)"
|
||||
else:
|
||||
annex_title = "## Annex (shared assets)"
|
||||
annex_note = "_Deduped diagrams: each unique diagram is rendered once here; sections reference it by name._"
|
||||
diag_title = "### Diagrams (deduped)"
|
||||
|
||||
annex_lines = [annex_title, "", annex_note, "", diag_title, ""]
|
||||
for dgm in unique_diagrams:
|
||||
name = diagram_names.get(dgm["norm"]) or _diagram_label(dgm.get("raw", ""), locale=locale)
|
||||
annex_lines.extend([f"#### {name}", "", "```mermaid", dgm.get("raw", "").rstrip(), "```", ""])
|
||||
|
||||
final_lines = cleaned[:footer_sep_idx] + [""] + annex_lines + cleaned[footer_sep_idx:]
|
||||
return "\n".join([ln.rstrip() for ln in final_lines]).strip() + "\n"
|
||||
|
||||
|
||||
def _merge_consecutive_sections_by_title(sections: list[_SourceSection]) -> list[_SourceSection]:
|
||||
"""
|
||||
Extraction sometimes yields many page-level "sections" with the same repeated header title.
|
||||
For v2.0+ we merge consecutive equal-titled sections to prevent template repetition.
|
||||
"""
|
||||
|
||||
if len(sections) < 3:
|
||||
return sections
|
||||
|
||||
def norm(title: str) -> str:
|
||||
s = " ".join((title or "").split()).strip()
|
||||
s = re.sub(r"https?://\\S+", "", s).strip()
|
||||
s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip()
|
||||
s = " ".join(s.split()).strip()
|
||||
return s.upper()
|
||||
|
||||
merged: list[_SourceSection] = [sections[0]]
|
||||
for sec in sections[1:]:
|
||||
if merged and norm(sec.title) == norm(merged[-1].title):
|
||||
prev = merged[-1]
|
||||
body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip()
|
||||
why_prev = (prev.why_it_matters or "").strip()
|
||||
why_new = (sec.why_it_matters or "").strip()
|
||||
why = why_prev or why_new or None
|
||||
if why_prev and why_new and why_new not in why_prev:
|
||||
why = "\n".join([why_prev, why_new]).strip()
|
||||
merged[-1] = _SourceSection(title=prev.title, body=body, why_it_matters=why)
|
||||
else:
|
||||
merged.append(sec)
|
||||
return merged
|
||||
|
||||
|
||||
def _merge_repeated_titles_globally(sections: list[_SourceSection], *, min_repeats: int = 3) -> list[_SourceSection]:
|
||||
"""
|
||||
If a title repeats many times across extracted sections, it's usually a page header.
|
||||
Merge all instances into the first occurrence to avoid template repetition.
|
||||
"""
|
||||
|
||||
if len(sections) < 3:
|
||||
return sections
|
||||
|
||||
def norm(title: str) -> str:
|
||||
s = " ".join((title or "").split()).strip()
|
||||
s = re.sub(r"https?://\\S+", "", s).strip()
|
||||
s = re.sub(r"(?i)\\bthis publication is available free of charge from\\b:?.*$", "", s).strip()
|
||||
s = " ".join(s.split()).strip()
|
||||
return s.upper()
|
||||
|
||||
counts: dict[str, int] = {}
|
||||
for sec in sections[1:]:
|
||||
key = norm(sec.title)
|
||||
if not key:
|
||||
continue
|
||||
counts[key] = counts.get(key, 0) + 1
|
||||
|
||||
repeated = {k for k, n in counts.items() if n >= min_repeats}
|
||||
if not repeated:
|
||||
return sections
|
||||
|
||||
out: list[_SourceSection] = [sections[0]]
|
||||
merged_by_title: dict[str, _SourceSection] = {}
|
||||
out_idx_by_title: dict[str, int] = {}
|
||||
for sec in sections[1:]:
|
||||
key = norm(sec.title)
|
||||
if key in repeated:
|
||||
if key not in out_idx_by_title:
|
||||
out_idx_by_title[key] = len(out)
|
||||
merged_by_title[key] = sec
|
||||
out.append(sec)
|
||||
else:
|
||||
out_idx = out_idx_by_title[key]
|
||||
prev = merged_by_title[key]
|
||||
body = "\n\n".join([prev.body.strip(), sec.body.strip()]).strip()
|
||||
why_prev = (prev.why_it_matters or "").strip()
|
||||
why_new = (sec.why_it_matters or "").strip()
|
||||
why = why_prev or why_new or None
|
||||
if why_prev and why_new and why_new not in why_prev:
|
||||
why = "\n".join([why_prev, why_new]).strip()
|
||||
merged = _SourceSection(title=prev.title, body=body, why_it_matters=why)
|
||||
merged_by_title[key] = merged
|
||||
out[out_idx] = merged
|
||||
else:
|
||||
out.append(sec)
|
||||
return out
|
||||
|
||||
|
||||
def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str, style_version: str = "v1.8") -> str:
|
||||
today = _dt.date.today().isoformat()
|
||||
normalized = _normalize_ocr(source_text)
|
||||
|
|
@ -3519,6 +4246,9 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
|
|||
action_pack_enabled = (not _truthy_env("REVOICE_NO_ACTION_PACK")) or bool(action_pack) or _truthy_env("REVOICE_ACTION_PACK")
|
||||
|
||||
sections = _extract_sections(normalized)
|
||||
if style_version == "v2.0":
|
||||
sections = _merge_consecutive_sections_by_title(sections)
|
||||
sections = _merge_repeated_titles_globally(sections)
|
||||
if not sections:
|
||||
raise ValueError("No content extracted from source")
|
||||
if len(sections) == 1:
|
||||
|
|
@ -3616,8 +4346,15 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
|
|||
f"## {cover_h1}",
|
||||
]
|
||||
)
|
||||
cover_h2_out = ""
|
||||
if cover_h2:
|
||||
out.extend([f"### {cover_h2}", ""])
|
||||
if style_version == "v2.0":
|
||||
if not _looks_like_cover_subtitle_noise(cover_h2):
|
||||
cover_h2_out = _compact_title(cover_h2, max_chars=90)
|
||||
else:
|
||||
cover_h2_out = cover_h2
|
||||
if cover_h2_out:
|
||||
out.extend([f"### {cover_h2_out}", ""])
|
||||
else:
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3663,6 +4400,9 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
|
|||
out.extend([table, ""])
|
||||
|
||||
if action_pack_enabled:
|
||||
if style_version == "v2.0":
|
||||
out.append(_render_action_pack_v2_0(sections=sections[1:], normalized_text=normalized, locale=locale))
|
||||
else:
|
||||
out.append(_render_action_pack(sections[1:]))
|
||||
out.append("")
|
||||
|
||||
|
|
@ -3735,4 +4475,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac
|
|||
]
|
||||
)
|
||||
|
||||
return "\n".join(out).strip() + "\n"
|
||||
doc = "\n".join(out).strip() + "\n"
|
||||
if style_version == "v2.0":
|
||||
return _apply_dave_v2_0_postprocess(doc, locale=locale)
|
||||
return doc
|
||||
|
|
|
|||
|
|
@ -130,15 +130,33 @@ def _revoice_preflight(*, style: str, md_path: Path, source_path: Path) -> str:
|
|||
|
||||
|
||||
def _extract_first_claim(md: str) -> str:
|
||||
claims: list[str] = []
|
||||
for line in md.splitlines():
|
||||
m = re.match(r"^- The source claims: [“\"](?P<q>.+?)[”\"]\s*$", line.strip())
|
||||
if m:
|
||||
claim = m.group("q").strip()
|
||||
if len(claim) > 160:
|
||||
return claim[:157].rstrip() + "…"
|
||||
return claim
|
||||
claims.append(m.group("q").strip())
|
||||
|
||||
if not claims:
|
||||
return ""
|
||||
|
||||
def is_low_signal(claim: str) -> bool:
|
||||
c = (claim or "").strip()
|
||||
lower = c.lower()
|
||||
if "datasheet" in lower:
|
||||
return True
|
||||
if "all rights reserved" in lower or "copyright" in lower:
|
||||
return True
|
||||
# Very short fragments tend to be headers/footers or OCR junk.
|
||||
if len(c) < 40:
|
||||
return True
|
||||
return False
|
||||
|
||||
# Prefer the first non-noise claim; fall back to the first claim if all are noisy.
|
||||
chosen = next((c for c in claims if not is_low_signal(c)), claims[0])
|
||||
if len(chosen) > 160:
|
||||
return chosen[:157].rstrip() + "…"
|
||||
return chosen
|
||||
|
||||
|
||||
def _extract_first_dave_factor(md: str) -> str:
|
||||
for line in md.splitlines():
|
||||
|
|
@ -177,7 +195,8 @@ def _write_marketing(
|
|||
day_upper = day.day.upper()
|
||||
next_label = f"{next_day.day.upper()} — {next_day.edition} {next_day.hashtag}" if next_day else "Next week: new drops."
|
||||
dave_factor = _extract_first_dave_factor(dossier_md) or "The control drifts into a status update, and the status update becomes the control."
|
||||
claim = _extract_first_claim(dossier_md) or "(no short claim extracted)"
|
||||
claim = _extract_first_claim(dossier_md)
|
||||
quote_bullet = f"- The source claims: “{claim}”" if claim else "- (Add one short measurable source quote from the dossier’s Claims Register.)"
|
||||
|
||||
lines = [
|
||||
f"# Thread Pack — {day_upper} ({day.edition} Edition)",
|
||||
|
|
@ -207,7 +226,7 @@ def _write_marketing(
|
|||
"",
|
||||
"## Post 3 — The Source Claims (quote-budget)",
|
||||
"",
|
||||
f"- The source claims: “{claim}”",
|
||||
quote_bullet,
|
||||
"",
|
||||
"## Post 4 — Sponsor Bumper (mid-thread)",
|
||||
"",
|
||||
|
|
@ -602,14 +621,14 @@ def main() -> int:
|
|||
)
|
||||
|
||||
# Week index + full pack.
|
||||
m = re.search(r"(v\\d+(?:\\.\\d+)*)", args.style)
|
||||
m = re.search(r"(v\d+(?:\.\d+)*)", args.style)
|
||||
week_title = m.group(1) if m else args.style
|
||||
index_path = build_dir / "index.md"
|
||||
_write_week_index(out_path=index_path, week_title=week_title, base_url=args.base_url, days=ordered, source_links=source_links)
|
||||
|
||||
week_pack_path = build_dir / "week.pack.md"
|
||||
body_parts = [
|
||||
"# InfraFabric External Review Pack — Full Week (v1.9)",
|
||||
f"# InfraFabric External Review Pack — Full Week ({week_title})",
|
||||
"",
|
||||
"This file embeds all daily packs for sandboxed review environments. Review one day at a time.",
|
||||
"",
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue