From 082296e4dc8bf6c462e585e9f96596dfd957455c Mon Sep 17 00:00:00 2001 From: danny Date: Tue, 30 Dec 2025 05:33:44 +0000 Subject: [PATCH] Add IF.DAVE v2.2/v2.3 bibles and v2.x dossier hygiene --- AGENTS.md | 7 +- src/revoice/generate.py | 547 ++++++++++++++++++++++++++--- src/revoice/lint.py | 68 +++- style_bibles/IF.DAVE.BIBLE.md | 2 +- style_bibles/IF_DAVE_BIBLE_v2.2.md | 386 ++++++++++++++++++++ style_bibles/IF_DAVE_BIBLE_v2.3.md | 386 ++++++++++++++++++++ 6 files changed, 1343 insertions(+), 53 deletions(-) create mode 100644 style_bibles/IF_DAVE_BIBLE_v2.2.md create mode 100644 style_bibles/IF_DAVE_BIBLE_v2.3.md diff --git a/AGENTS.md b/AGENTS.md index bec235f..34cf74e 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,8 +4,8 @@ This repo generates **Shadow Dossiers** by applying versioned style bibles (e.g. ## Current “Dave” baseline -- Latest bible: `style_bibles/IF_DAVE_BIBLE_v2.1.md` (`if://bible/dave/v2.1`) -- Public static copy: https://infrafabric.io/static/hosted/bibles/IF_DAVE_BIBLE_v2.1.md +- Latest bible: `style_bibles/IF_DAVE_BIBLE_v2.3.md` (`if://bible/dave/v2.3`) +- Public static copy: https://infrafabric.io/static/hosted/bibles/IF_DAVE_BIBLE_v2.3.md - v1.8 generator behavior (implemented in `src/revoice/generate.py`): - Adds `MIRROR COMPLETENESS: OK|DEGRADED` (and optional hard fail via `REVOICE_QUALITY_GATE=1`) - Adds `## Claims Register (source-attributed)` for measurable claims (numbers, %, tiers, retention windows) @@ -14,6 +14,9 @@ This repo generates **Shadow Dossiers** by applying versioned style bibles (e.g. - v2.1 dossier hygiene: - Adds a stable online source link in the header: `SOURCE DOC (online): https://infrafabric.io/static/source/.pdf` - Does not emit `if://bible/...` URIs in public dossier output (use `https://infrafabric.io/static/...` receipts instead) + - v2.2+ week-pack framing: + - Uses weekday headings (`## Monday`, etc) + `### Company | Report` subtitle for `mon.pdf`…`sun.pdf` inputs + - Adds a short “Time journalist” intro and a plain-text “Podcast Script” block (diagram narration) ## Static hosting (critical trap) diff --git a/src/revoice/generate.py b/src/revoice/generate.py index 9b4aa2b..d0936a6 100644 --- a/src/revoice/generate.py +++ b/src/revoice/generate.py @@ -33,6 +33,8 @@ def generate_shadow_dossier(*, style_id: str, source_text: str, source_path: str "if.dave.v1.9", "if.dave.v2.0", "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", @@ -46,18 +48,37 @@ def generate_shadow_dossier(*, style_id: str, source_text: str, source_path: str "if://bible/dave/v1.9", "if://bible/dave/v2.0", "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", }: style = style_id.lower() locale = "fr" if style in {"if.dave.fr.v1.2", "if.dave.fr.v1.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3"} else "en" - if style in {"if.dave.v2.0", "if.dave.v2.1", "if://bible/dave/v2.0", "if://bible/dave/v2.1"}: + if style in { + "if.dave.v2.0", + "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", + "if://bible/dave/v2.0", + "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", + }: return _generate_dave_v1_8_mirror( source_text=source_text, source_path=source_path, action_pack=action_pack, locale=locale, - style_version="v2.1" if style in {"if.dave.v2.1", "if://bible/dave/v2.1"} else "v2.0", + style_version=( + "v2.3" + if style in {"if.dave.v2.3", "if://bible/dave/v2.3"} + else ( + "v2.2" + if style in {"if.dave.v2.2", "if://bible/dave/v2.2"} + else ("v2.1" if style in {"if.dave.v2.1", "if://bible/dave/v2.1"} else "v2.0") + ) + ), ) if style in {"if.dave.v1.9", "if://bible/dave/v1.9"}: return _generate_dave_v1_8_mirror( @@ -628,13 +649,14 @@ def _normalize_unicode_punctuation(text: str) -> str: return str(text or "").translate(_UNICODE_QUOTE_MAP) -def _coerce_json_code_block(code: str) -> tuple[str, str, bool]: +def _coerce_json_code_block(code: str) -> tuple[str, str, bool, bool]: cleaned = _normalize_unicode_punctuation(code).strip() + jsonish = cleaned.lstrip().startswith("{") or cleaned.lstrip().startswith("[") try: obj = json.loads(cleaned) except Exception: - return "text", cleaned, False - return "json", json.dumps(obj, indent=2, ensure_ascii=False), True + return "text", cleaned, False, jsonish + return "json", json.dumps(obj, indent=2, ensure_ascii=False), True, True def _parse_pages(source_text: str) -> list[tuple[str, str]]: @@ -2440,7 +2462,7 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: ] ) elif title_upper.startswith("APPENDIX 1") or "ARCHITECTURE" in title_upper: - if ctx.voice in {"v2.0", "v2.1"}: + if ctx.voice in {"v2.0", "v2.1", "v2.2", "v2.3"}: variants = [ "Architecture diagrams are where optimism meets the enforcement boundary (and quietly loses).", "Architecture diagrams are forwardable; boundaries are enforceable. Dave prefers the version you can screenshot.", @@ -2493,7 +2515,7 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: ] ) elif "SECURITY TEAM" in title_upper or "SECURITY REVIEW" in title_upper: - if ctx.voice in {"v2.0", "v2.1"}: + if ctx.voice in {"v2.0", "v2.1", "v2.2", "v2.3"}: variants = [ 'Security team efficiency is a legitimate goal, especially when queues become the organization’s truth serum. The risk is claiming throughput without defining what “review complete” means or what evidence proves it.', 'Faster reviews are defensible; unmeasured reviews are theater. Define “complete,” define the evidence, and make drift visible before the next audit season.', @@ -2689,7 +2711,7 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: paragraphs.append(ctx.pick_unique(kind="paragraph:fallback", key=section.title, variants=variants, used=ctx.used_paragraphs)) raw_title = section.title - heading_title = _compact_title(raw_title, max_chars=72) if ctx.voice in {"v2.0", "v2.1"} else raw_title + heading_title = _compact_title(raw_title, max_chars=72) if ctx.voice in {"v2.0", "v2.1", "v2.2", "v2.3"} else raw_title out: list[str] = [f"## {heading_title}"] if heading_title != raw_title: out.extend(["", f"> {raw_title}"]) @@ -2787,10 +2809,14 @@ def _render_section(section: _SourceSection, *, ctx: _RenderContext) -> str: code = _extract_code_block(section.body) if code: - lang, normalized_code, is_valid_json = _coerce_json_code_block(code) + lang, normalized_code, is_valid_json, jsonish = _coerce_json_code_block(code) + # v2.1+ OpSec: do not publish unusable JSON/config snippets. + # If the snippet looks like JSON but cannot be parsed without guessing, omit it silently. + if jsonish and not is_valid_json and ctx.voice in {"v2.0", "v2.1", "v2.2", "v2.3"}: + normalized_code = "" if is_valid_json: out.extend(["", f"```{lang}", normalized_code.strip(), "```"]) - else: + elif normalized_code.strip(): # OpSec: do not label pipeline/extraction artifacts (no "OCR", "unverified", etc.). out.extend(["", "### Source snippet", "", f"```{lang}", normalized_code.strip(), "```"]) @@ -3288,6 +3314,328 @@ def _infer_vertical_line(*, normalized_text: str, source_basename: str, locale: return None +_WEEK_DAY_NAME: dict[str, str] = { + "mon": "Monday", + "tue": "Tuesday", + "wed": "Wednesday", + "thu": "Thursday", + "fri": "Friday", + "sat": "Saturday", + "sun": "Sunday", +} + + +def _week_day_name_from_source_basename(source_basename: str) -> str | None: + """ + Week packs use predictable day filenames (mon.pdf, tue.pdf, ...). + When present, we can produce TV-show friendly headings without guessing. + """ + + try: + p = Path(source_basename) + except Exception: + return None + if p.suffix.lower() != ".pdf": + return None + key = p.stem.strip().lower() + return _WEEK_DAY_NAME.get(key) + + +def _infer_source_company_name(*, normalized_text: str) -> str: + """ + Best-effort brand inference for week-pack sources. + Prefer explicit vendor/org names found in the source text. Fall back to "the vendor". + """ + + t = (normalized_text or "").lower() + if "snyk" in t: + return "Snyk" + if "owasp" in t: + return "OWASP" + if "palo alto" in t or "cortex xsiam" in t: + return "Palo Alto Networks" + if "sentinelone" in t or "singularity" in t: + return "SentinelOne" + if "yubico" in t or "yubikey" in t: + return "Yubico" + if "vanta" in t: + return "Vanta" + if "nist" in t: + return "NIST" + return "the vendor" + + +def _infer_report_short_title(*, normalized_text: str, company: str) -> str: + """ + Pull a short, human-readable title from the cover/first page. + This is used for the TV-show heading line (Company | Report). + """ + + pages = _parse_pages(normalized_text) + first_page = "" + for _no, body in pages: + if (body or "").strip(): + first_page = body + break + if not first_page: + first_page = (normalized_text or "")[:2400] + + stopwords = { + "introduction", + "table of contents", + "contents", + "overview", + "abstract", + "license and usage", + "revision history", + } + title_lines: list[str] = [] + for raw in first_page.splitlines(): + ln = raw.strip() + if not ln: + continue + if ln.startswith("====="): + continue + lower = ln.lower() + if lower in stopwords: + break + if company and lower == company.lower(): + continue + title_lines.append(ln) + if len(title_lines) >= 4: + break + + title = " ".join(title_lines).strip() + title = re.sub(r"\\s+", " ", title).strip() + title = title.strip(" -:—") + # Keep it short; this line is a header, not a paragraph. + if len(title) > 72: + title = _compact_title(title, max_chars=72) + return title or "Source document" + + +def _render_time_journalist_intro( + *, + company: str, + report_title: str, + section_titles: list[str], + locale: str, +) -> list[str]: + if locale.lower().startswith("fr"): + # v2.2+ EN-first: avoid partial FR translation drift until we have a dedicated FR bible for this. + return [] + + topics = [t for t in section_titles if t and not t.strip().upper().startswith("ANNEX")] + topics = topics[:3] + if topics: + topics_line = "; ".join(f"**{_compact_title(t, max_chars=48)}**" for t in topics) + summary = f"In plain terms, the source walks through: {topics_line}." + else: + summary = "In plain terms, the source is a rollout guide with controls, caveats, and optimism." + + return [ + f"We have a soft spot for {company}. Today we’re covering **{report_title}**.", + summary, + "", + "How to read this dossier:", + "- You do not need the PDF open; we quote the source as we go.", + "- We add short Red Team notes where incentives turn controls into theater.", + "- If you want the receipts, start with the source hash and follow the links in the pack/trace.", + "", + "OK. Let’s dig.", + ] + + +def _extract_annex_mermaids(md: str) -> list[tuple[str, str]]: + """ + Return [(diagram_name, mermaid_code)] from the v2.x Annex section. + """ + + diagrams: list[tuple[str, str]] = [] + lines = md.splitlines() + in_annex = False + i = 0 + while i < len(lines): + ln = lines[i].strip() + if ln.startswith("## Annex"): + in_annex = True + i += 1 + continue + if in_annex and ln.startswith("## ") and not ln.startswith("## Annex"): + # Past annex + break + if in_annex and ln.startswith("#### "): + name = ln[5:].strip() + # seek code fence + j = i + 1 + while j < len(lines) and lines[j].strip() != "```mermaid": + if lines[j].strip().startswith("#### "): + break + j += 1 + if j >= len(lines) or lines[j].strip() != "```mermaid": + i += 1 + continue + k = j + 1 + while k < len(lines) and lines[k].strip() != "```": + k += 1 + code = "\n".join(lines[j + 1 : k]).strip() + if code: + diagrams.append((name, code)) + i = k + 1 + continue + i += 1 + return diagrams + + +def _describe_mermaid_for_audio(*, name: str, code: str) -> str: + """ + Minimal, robust narration: describe the node sequence and call out any explicit decision points. + """ + + code_lines = [ln.rstrip() for ln in (code or "").splitlines() if ln.strip()] + first = (code_lines[0] if code_lines else "").strip().lower() + + # Build id->label map for flowcharts/state-ish diagrams. + id_to_label: dict[str, str] = {} + for ln in code_lines: + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\s*\\[\\\"(.+?)\\\"\\]\\s*$", ln) + if m: + id_to_label[m.group(1)] = m.group(2) + continue + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\s*\\[(.+?)\\]\\s*$", ln) + if m: + id_to_label[m.group(1)] = m.group(2) + continue + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\s*\\{(.+?)\\}\\s*$", ln) + if m: + id_to_label[m.group(1)] = m.group(2) + continue + + def label(node_id: str) -> str: + return (id_to_label.get(node_id) or node_id).strip().strip('"').strip() + + if first.startswith("statediagram"): + transitions: list[tuple[str, str, str]] = [] + for ln in code_lines: + m = re.match(r"^\\s*([A-Za-z0-9_\\*\\[\\]]+)\\s*-->\\s*([A-Za-z0-9_\\*\\[\\]]+)(?:\\s*:\\s*(.+))?$", ln) + if not m: + continue + transitions.append((m.group(1).strip(), m.group(2).strip(), (m.group(3) or "").strip())) + if not transitions: + return "State diagram showing a lifecycle that mostly loops in place." + parts = [] + for a, b, note in transitions[:8]: + if note: + parts.append(f"{a} to {b} ({note})") + else: + parts.append(f"{a} to {b}") + return "State transitions: " + "; ".join(parts) + "." + + # Default: flowchart-ish narration. + edges: list[tuple[str, str]] = [] + decisions: list[str] = [] + for ln in code_lines: + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\s*-->\\s*\\|[^|]+\\|\\s*([A-Za-z0-9_]+)\\s*$", ln) + if m: + edges.append((m.group(1), m.group(2))) + continue + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\s*-->\\s*([A-Za-z0-9_]+)\\s*$", ln) + if m: + edges.append((m.group(1), m.group(2))) + continue + m = re.match(r"^\\s*([A-Za-z0-9_]+)\\{(.+?)\\}\\s*$", ln) + if m: + decisions.append(m.group(2).strip()) + + ordered: list[str] = [] + for a, b in edges: + if a not in ordered: + ordered.append(a) + if b not in ordered: + ordered.append(b) + + if not ordered: + return "Flow chart showing a control workflow with loops and exception handling." + + labels = [label(n) for n in ordered[:10]] + seq = " -> ".join(labels) + if decisions: + return f"Flow: {seq}. Decision points include: {', '.join(decisions[:2])}." + return f"Flow: {seq}." + + +def _insert_podcast_script_v2_2( + *, + md: str, + company: str, + report_title: str, + source_doc_url: str, + locale: str, +) -> str: + if locale.lower().startswith("fr"): + return md + + diagrams = _extract_annex_mermaids(md) + diagram_lines: list[str] = [] + for name, code in diagrams: + diagram_lines.extend([f"- {name}: {_describe_mermaid_for_audio(name=name, code=code)}"]) + + script: list[str] = [ + "## Podcast Script (plain text)", + "", + "```text", + f"Today’s declassified dossier covers {company} — {report_title}.", + "", + "You do not need the source PDF open. This dossier quotes the source as it goes, then adds the Red Team lens: where the control turns into theater, and what has to be true for it to hold.", + "", + "If you want to verify later, use the source link and hash in the header.", + ] + if source_doc_url: + script.extend(["", f"Source PDF: {source_doc_url}"]) + + script.extend( + [ + "", + "We will cover the mirrored sections, then summarize the gates and stop conditions in the Action Pack.", + "", + "Diagram walkthrough (describe aloud):", + ] + ) + if diagram_lines: + script.extend(diagram_lines) + else: + script.append("- (No diagrams found.)") + + script.extend( + [ + "", + "Close:", + "If the calendar is the deliverable, then the risk is already in production — and the evidence is still in phase two.", + "```", + "", + ] + ) + + # Insert right before the footer separator (last '---' outside fences). + lines = md.splitlines() + footer_sep_idx = None + in_fence = False + for idx, ln in enumerate(lines): + stripped = ln.strip() + if stripped.startswith("```"): + in_fence = not in_fence + continue + if in_fence: + continue + if stripped == "---": + footer_sep_idx = idx + if footer_sep_idx is None: + footer_sep_idx = len(lines) + + out_lines = lines[:footer_sep_idx] + [""] + script + lines[footer_sep_idx:] + return "\n".join([ln.rstrip() for ln in out_lines]).strip() + "\n" + + def _generate_dave_v1_3_mirror(*, source_text: str, source_path: str, action_pack: bool, locale: str) -> str: today = _dt.date.today().isoformat() normalized = _normalize_ocr(source_text) @@ -3725,59 +4073,126 @@ def _generate_dave_v1_7_mirror(*, source_text: str, source_path: str, action_pac return "\n".join(out).strip() + "\n" -def _extract_claim_lines(*, normalized_text: str, max_items: int = 12) -> list[str]: +def _extract_claim_lines(*, normalized_text: str, max_items: int = 7) -> list[str]: lines = [ln.strip() for ln in normalized_text.splitlines()] - claims: list[str] = [] seen: set[str] = set() - def keep(s: str) -> bool: + def should_join(prev: str, nxt: str) -> bool: + if not nxt: + return False + prev_s = prev.rstrip() + nxt_s = nxt.lstrip() + if not prev_s or not nxt_s: + return False + # Classic PDF hyphenation: "AI-" + "generated". + if prev_s.endswith("-") and not prev_s.endswith(" -"): + return True + # If the previous line doesn't look sentence-terminated, allow one-line continuation. + if not prev_s.endswith((".", "!", "?", ":", "…")) and len(prev_s) < 160: + # Continuation lines often start lower-case, digits, or punctuation. + if nxt_s[:1].islower() or nxt_s[:1].isdigit() or nxt_s.startswith(("(", "[", "{", "“", "\"", "’", "'")): + return True + return False + + def base_filter(s: str) -> bool: if not s or len(s) < 14: return False - # Avoid internal extraction artifacts and navigation noise. - lower = s.lower() - # Avoid low-signal page headers like "… | Datasheet 1". - if "datasheet" in lower: - return False - if "trace id" in lower: - return False - # Claims register is for measurable statements, not links (URLs cause broken rendering/quoting). + lower = s.lower().strip() if "http://" in lower or "https://" in lower: return False if lower in {"markdown content:", "url source:"}: return False + if "trace id" in lower: + return False + # Page markers from extract stage. + if "===== page-" in lower or lower.startswith("====="): + return False + # Avoid email-heavy table rows (these belong in the mirrored table, not in the claims register). + if "@" in s and not any(tok in s for tok in ["%", "$", "€", "£"]): + return False + # Skip obvious code/config fragments. + if any(ch in s for ch in ["{", "}", "```"]): + return False # Avoid pure page numbers. if s.isdigit() and len(s) <= 4: return False return True - for ln in lines: - if not keep(ln): + def score_claim(s: str) -> int: + lower = s.lower() + score = 0 + if "%" in s or "$" in s or "€" in s or "£" in s: + score += 3 + if re.search(r"\b\d{3,}\b", s): + score += 2 + if re.search(r"\b\d{2,}\b", s): + score += 1 + if re.search(r"\b\d+\s*(?:ms|sec|secs|seconds|min|mins|minutes|hour|hours|day|days|week|weeks|month|months|year|years|gb|tb|mb|kb|x)\b", lower): + score += 2 + if any(kw in lower for kw in ["study", "studies", "roi", "increase", "decrease", "reduction", "saves", "save", "cost", "faster", "fidelity"]): + score += 1 + # Penalties for common low-signal junk. + if "datasheet" in lower: + score -= 5 + if "screenshot" in lower and "%" not in s: + score -= 3 + if "all rights reserved" in lower or "copyright" in lower: + score -= 3 + if "microsoft.com/devcontainers" in lower or "devcontainers" in lower: + score -= 4 + if len(s) < 40: + score -= 1 + return score + + candidates: list[tuple[int, int, str]] = [] + i = 0 + while i < len(lines): + ln = lines[i] + if not base_filter(ln): + i += 1 continue - if not re.search(r"\d", ln) and "%" not in ln and "$" not in ln and "€" not in ln and "£" not in ln: + merged = ln + # Stitch a small number of continuation lines to avoid truncated claims. + stitched = 0 + while i + 1 < len(lines) and stitched < 3 and should_join(merged, lines[i + 1]): + nxt = lines[i + 1] + if merged.rstrip().endswith("-") and not merged.rstrip().endswith(" -"): + prefix = merged.rstrip()[:-1] + last_tok = (prefix.split()[-1] if prefix.split() else "").strip() + # Heuristic: short tokens like AI-/IDE-/PR- are often meaningful hyphens (keep "-"). + keep_dash = 0 < len(last_tok) <= 3 + merged = (merged.rstrip() + nxt.lstrip()) if keep_dash else (prefix + nxt.lstrip()) + else: + merged = merged.rstrip() + " " + nxt.lstrip() + i += 1 + stitched += 1 + merged = " ".join(merged.split()).strip() + if not merged: + i += 1 continue - # Numeric guardrail: ignore lines where the only digits are untyped singletons (e.g., "Datasheet 1"). - lower = ln.lower() - has_multi_digit = bool(re.search(r"\b\d{2,}\b", ln)) - has_typed_singleton = bool( - re.search( - r"\b\d+\s*(?:ms|s|sec|secs|seconds|min|mins|minutes|hour|hours|day|days|x|gb|tb|mb|kb)\b", - lower, - ) - ) - if not ("%" in ln or "$" in ln or "€" in ln or "£" in ln or has_multi_digit or has_typed_singleton): + if merged.rstrip().endswith("-") and not merged.rstrip().endswith(" -"): + i += 1 + continue + if not (re.search(r"\d", merged) or "%" in merged or "$" in merged or "€" in merged or "£" in merged): + i += 1 continue # Skip obviously broken glyph runs. - if sum(1 for ch in ln if " " <= ch <= "~") < max(8, int(len(ln) * 0.5)): + if sum(1 for ch in merged if " " <= ch <= "~") < max(8, int(len(merged) * 0.5)): + i += 1 continue - norm = " ".join(ln.split()).strip() - norm_key = norm.lower() - if norm_key in seen: + key = merged.lower() + if key in seen: + i += 1 continue - seen.add(norm_key) - claims.append(norm) - if len(claims) >= max_items: - break - return claims + seen.add(key) + s = score_claim(merged) + if s > 0: + candidates.append((s, i, merged)) + i += 1 + + # Choose the strongest measurable claims first, stable-tie on appearance order. + candidates.sort(key=lambda t: (-t[0], t[1])) + return [c for _, _, c in candidates[:max_items]] def _looks_like_government_standard(*, normalized_text: str, source_basename: str) -> bool: @@ -4239,7 +4654,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac action_pack_enabled = (not _truthy_env("REVOICE_NO_ACTION_PACK")) or bool(action_pack) or _truthy_env("REVOICE_ACTION_PACK") sections = _extract_sections(normalized) - if style_version in {"v2.0", "v2.1"}: + if style_version in {"v2.0", "v2.1", "v2.2", "v2.3"}: sections = _merge_consecutive_sections_by_title(sections) sections = _merge_repeated_titles_globally(sections) if not sections: @@ -4294,6 +4709,11 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac if source_file_sha != "unknown" and len(source_file_sha) >= 12: source_file_sha_short = f"{source_file_sha[:4]}…{source_file_sha[-3:]}" + is_episode_framed = style_version in {"v2.2", "v2.3"} + week_day_heading = _week_day_name_from_source_basename(source_basename) if is_episode_framed else None + inferred_company = _infer_source_company_name(normalized_text=normalized) if is_episode_framed else "" + inferred_report_title = _infer_report_short_title(normalized_text=normalized, company=inferred_company) if is_episode_framed else "" + if ( not cover_h1 or cover_h1.upper() == "COUVERTURE" @@ -4303,6 +4723,16 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac ): cover_h1 = filename_title + # v2.2+: Week packs should read like a daily “episode”: stable day heading, vendor + report title. + if is_episode_framed and week_day_heading: + cover_h1 = week_day_heading + if inferred_company and inferred_report_title: + cover_h2 = f"{inferred_company} | {inferred_report_title}" + # Make slugs more meaningful than mon.pdf/tue.pdf. + if inferred_company and inferred_report_title: + project_slug = _slugify(f"{inferred_company}-{inferred_report_title}-mirror") + source_slug = _slugify(f"{inferred_company}-{source_basename}") + vertical_line = _infer_vertical_line(normalized_text=normalized, source_basename=source_basename, locale=locale) out: list[str] = [ @@ -4356,7 +4786,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac ) cover_h2_out = "" if cover_h2: - if style_version in {"v2.0", "v2.1"}: + if style_version in {"v2.0", "v2.1", "v2.2", "v2.3"}: if not _looks_like_cover_subtitle_noise(cover_h2): cover_h2_out = _compact_title(cover_h2, max_chars=90) else: @@ -4383,6 +4813,18 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac ] ) + if is_episode_framed: + section_titles = [s.title for s in sections[1:] if (s.title or "").strip()] + intro_lines = _render_time_journalist_intro( + company=inferred_company or "the vendor", + report_title=inferred_report_title or cover_h2_out or cover_h1, + section_titles=section_titles, + locale=locale, + ) + if intro_lines: + out.extend(intro_lines) + out.append("") + for section in sections[1:]: if section.title.strip().upper() == "INTRODUCTION": out.append(_render_intro(section, ctx=ctx)) @@ -4408,7 +4850,7 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac out.extend([table, ""]) if action_pack_enabled: - if style_version in {"v2.0", "v2.1"}: + if style_version in {"v2.0", "v2.1", "v2.2", "v2.3"}: out.append(_render_action_pack_v2_0(sections=sections[1:], normalized_text=normalized, locale=locale)) else: out.append(_render_action_pack(sections[1:])) @@ -4484,6 +4926,15 @@ def _generate_dave_v1_8_mirror(*, source_text: str, source_path: str, action_pac ) doc = "\n".join(out).strip() + "\n" - if style_version in {"v2.0", "v2.1"}: - return _apply_dave_v2_0_postprocess(doc, locale=locale) + if style_version in {"v2.0", "v2.1", "v2.2", "v2.3"}: + doc = _apply_dave_v2_0_postprocess(doc, locale=locale) + if style_version in {"v2.2", "v2.3"}: + doc = _insert_podcast_script_v2_2( + md=doc, + company=inferred_company or "the vendor", + report_title=inferred_report_title or cover_h2_out or cover_h1, + source_doc_url=source_doc_url, + locale=locale, + ) + return doc return doc diff --git a/src/revoice/lint.py b/src/revoice/lint.py index 9e3ba36..dfab5c3 100644 --- a/src/revoice/lint.py +++ b/src/revoice/lint.py @@ -22,6 +22,9 @@ def lint_markdown(*, style_id: str, markdown: str) -> list[str]: "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", + "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", @@ -32,10 +35,33 @@ def lint_markdown(*, style_id: str, markdown: str) -> list[str]: "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", + "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", } - min_mermaid = 2 if style_id.lower() in {"if.dave.v1.7", "if://bible/dave/v1.7", "if.dave.v1.8", "if://bible/dave/v1.8", "if.dave.v1.9", "if://bible/dave/v1.9", "if.dave.v2.0", "if://bible/dave/v2.0"} else (1 if require_mermaid else 0) + min_mermaid = ( + 2 + if style_id.lower() + in { + "if.dave.v1.7", + "if://bible/dave/v1.7", + "if.dave.v1.8", + "if://bible/dave/v1.8", + "if.dave.v1.9", + "if://bible/dave/v1.9", + "if.dave.v2.0", + "if://bible/dave/v2.0", + "if.dave.v2.1", + "if://bible/dave/v2.1", + "if.dave.v2.2", + "if://bible/dave/v2.2", + "if.dave.v2.3", + "if://bible/dave/v2.3", + } + else (1 if require_mermaid else 0) + ) if style_id.lower() in { "if.dave.v1", "if.dave.v1.1", @@ -46,6 +72,9 @@ def lint_markdown(*, style_id: str, markdown: str) -> list[str]: "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", + "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", @@ -58,6 +87,9 @@ def lint_markdown(*, style_id: str, markdown: str) -> list[str]: "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", + "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", }: @@ -74,6 +106,9 @@ def lint_markdown_with_source(*, style_id: str, markdown: str, source_text: str) "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", + "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", @@ -84,10 +119,33 @@ def lint_markdown_with_source(*, style_id: str, markdown: str, source_text: str) "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", + "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", } - min_mermaid = 2 if style_id.lower() in {"if.dave.v1.7", "if://bible/dave/v1.7", "if.dave.v1.8", "if://bible/dave/v1.8", "if.dave.v1.9", "if://bible/dave/v1.9", "if.dave.v2.0", "if://bible/dave/v2.0"} else (1 if require_mermaid else 0) + min_mermaid = ( + 2 + if style_id.lower() + in { + "if.dave.v1.7", + "if://bible/dave/v1.7", + "if.dave.v1.8", + "if://bible/dave/v1.8", + "if.dave.v1.9", + "if://bible/dave/v1.9", + "if.dave.v2.0", + "if://bible/dave/v2.0", + "if.dave.v2.1", + "if://bible/dave/v2.1", + "if.dave.v2.2", + "if://bible/dave/v2.2", + "if.dave.v2.3", + "if://bible/dave/v2.3", + } + else (1 if require_mermaid else 0) + ) if style_id.lower() in { "if.dave.v1", "if.dave.v1.1", @@ -98,6 +156,9 @@ def lint_markdown_with_source(*, style_id: str, markdown: str, source_text: str) "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", + "if.dave.v2.1", + "if.dave.v2.2", + "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", @@ -110,6 +171,9 @@ def lint_markdown_with_source(*, style_id: str, markdown: str, source_text: str) "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", + "if://bible/dave/v2.1", + "if://bible/dave/v2.2", + "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", }: diff --git a/style_bibles/IF.DAVE.BIBLE.md b/style_bibles/IF.DAVE.BIBLE.md index d0550ed..f1c85bc 100644 --- a/style_bibles/IF.DAVE.BIBLE.md +++ b/style_bibles/IF.DAVE.BIBLE.md @@ -7,7 +7,7 @@ This file is kept for backward compatibility. Current version: -- `re-voice/style_bibles/IF_DAVE_BIBLE_v2.0.md` (`if://bible/dave/v2.0`) +- `re-voice/style_bibles/IF_DAVE_BIBLE_v2.3.md` (`if://bible/dave/v2.3`) --- diff --git a/style_bibles/IF_DAVE_BIBLE_v2.2.md b/style_bibles/IF_DAVE_BIBLE_v2.2.md new file mode 100644 index 0000000..5550601 --- /dev/null +++ b/style_bibles/IF_DAVE_BIBLE_v2.2.md @@ -0,0 +1,386 @@ +# IF.DAVE.BIBLE v2.2 (mirror-first, episode framing, podcast-ready) + +**Author:** InfraFabric Red Team +**Status:** SATIRE / SOCIOTECHNICAL RED TEAM TOOL +**Citation:** [if://bible/dave/v2.2](https://infrafabric.io/static/hosted/bibles/IF_DAVE_BIBLE_v2.2.md) +**Changes from v2.1:** Adds “episode framing” so week packs read like daily drops (weekday heading + `Company | Report`), inserts a short “Time journalist” intro explaining how to read, and appends a plain-text podcast script that narrates diagrams without adding new claims. + +> This is satire. “Dave” is a pattern, not a person. +> Use it to expose rollout dilutions, not to make decisions. + +--- + +## 0) InfraFabric Red Team branding (required) + +Frame the output as an **InfraFabric Red Team** artifact, not “internet satire.” + +At the top of the document, include a “declassified” header block (plain Markdown): + +```text +--- +BRAND: InfraFabric.io +UNIT: RED TEAM (STRATEGIC OPS) +DOCUMENT: SHADOW DOSSIER +CLASSIFICATION: EYES ONLY // DAVE +--- + +# [ RED TEAM DECLASSIFIED ] +## PROJECT: +### SOURCE: +**INFRAFABRIC REPORT ID:** `IF-RT-DAVE-` +**SOURCE DOC (online):** `` + +> NOTICE: This document is a product of InfraFabric Red Team. +> It exposes socio-technical frictions where incentives turn controls into theater. +``` + +v2.2 required: the header must include a stable online source link. +- Prefer the no-login stable alias: `https://infrafabric.io/static/source/.pdf` +- If the URL is long, use a short Markdown label (e.g., `[Source PDF]`) and keep the full URL as the link target. + +Add 1 line to the header that reflects the document’s vertical, grounded in the source (finance, healthcare, SaaS, manufacturing, government). Use a sector-relevant risk phrase (e.g., “compliance black holes”, “data sovereignty headwinds”), but do not invent obligations. + +Optional “stamp” lines (use sparingly near section breaks): + +```text +**[ ACCESS GRANTED: INFRAFABRIC RED TEAM ]** +**[ STATUS: OPERATIONAL REALISM ]** +``` + +v2.2 note: keep it cold. “Vendors promise speed. Dave delivers the stall.” + +## 0a) Episode framing (v2.2 required for week packs) + +Week packs should be readable by someone who never opened the source PDF. + +When the source file is a week-pack day (e.g., `mon.pdf`, `tue.pdf`): + +- Replace ambiguous titles like `## mon` with the weekday: `## Monday`, `## Tuesday`, etc. +- Add a single, TV-friendly subtitle line: `### | ` +- Then immediately include a short intro (see 0aa) before the first mirrored section. + +This is presentation only. Do not change section order, numbers, or source claims. + +## 0aa) “Time journalist” intro (v2.2 required) + +After the header + protocol block, include a short intro that: + +- Names the vendor/org and the document being covered (1 line). +- Gives a 1–2 line summary in plain terms. +- Explains how to read the dossier (no PDF required; we quote as we go; we add short Red Team notes; receipts are in the pack/trace). +- Ends with: `OK. Let’s dig.` + +Tone rule: dry, credible, forwardable. No emojis unless present in the source. + +## 0ab) Podcast script (v2.2 required) + +Append a **plain-text** section for audio generation (e.g., ElevenLabs): + +- Heading: `## Podcast Script (plain text)` +- Put the script in a ` ```text ` code fence. +- The script must: + - Explain the dossier’s reading mode (“you don’t need the PDF open; we quote it as we go”). + - Describe each Mermaid diagram in simple language (sequence + decision points). + - Avoid adding new factual claims; only paraphrase what’s in the dossier/diagrams. + +This is narration for accessibility, not additional analysis. + +## 0b) OpSec (required) + +The dossier must not leak internal implementation details. + +- Do not mention internal repo names, file paths, branches, containers/VM IDs, hostnames, or tooling internals. +- Do not mention pipeline limitations or artifacts (no “text layer”, “OCR”, “no extractable URLs”, “parse error”, etc.). If something is missing, omit it without explanation. +- Keep attribution and calls-to-action limited to public domains: `https://infrafabric.io` and `https://red-team.infrafabric.io`. +- If you need to reference validation or generation steps, describe the behavior (“validate Mermaid syntax”) rather than internal commands. +- Do not include `if://bible/...` (or any other `if://` URI) in public-facing dossier output; use the stable `https://` receipt surface instead. + +## 0c) Vertical adaptability (required) + +Dossiers must adapt to verticals without fluff. + +Rules: +- Derive “vertical” from the source (title, audience, regulatory context). If unclear, keep it generic; do not guess. +- Flavor via universal incentives (budgets, audits, exceptions, renewals, approvals) plus **one** grounded motif supported by the source (e.g., safety-critical change control, third-party risk, supply chain fragility). +- Do not emit literal placeholders. Resolve them before output. +- Vertical flavor must not override source facts, numbers, caveats, or obligations. + +## 0d) Evidence Artifacts (required) + +Treat “evidence” as a first-class failure surface: it’s where controls die quietly. + +Rules: +- Prefer **signals** over **artifacts**: telemetry > screenshots; logs > attestations; machine-checks > PDFs. +- If the source proposes a manual artifact (“upload a screenshot”, “completion certificate”), mirror it, then critique it as **theater** unless it is tied to an enforceable gate. +- Never publish unusable code/config snippets as “evidence”. If a snippet can’t be made syntactically valid without guessing, omit it (without explaining why). + +Operational concreteness (generic; do not fabricate vendor APIs): +- When you propose “verifiable telemetry”, make it minimally opposable by naming a **signal shape**: + - **event type** (e.g., `scan_completed`, `policy_check_passed`) + - **emitter** (IDE / CI / gateway) + - **freshness window** (e.g., “must be newer than 14 days”) + - **owner** (who is paged when it goes dark) + +Also consider (when the source is about scanning/guardrails): +- **Noise is a bypass engine:** if the control is too noisy (false positives, flaky rules), developers will route around it. Do not claim this is true for a specific tool unless the source states it; treat it as a rollout failure mode to test for. + +## 0e) TV Series Mode (optional) + +When `series_mode=true`, the generator must additionally emit a **Thread Pack** distribution layer (without rewriting the dossier). + +Thread Pack (daily) structure (suggested): +1. Evening “Next On” teaser (previous day 8:00 PM EST) +2. Day-of Pre-Show promo (6:00 AM EST) with one hero diagram +3. Main Episode thread (5–7 posts: hook + visuals + short quotes + links + poll + next-day tease) + +Constraints: +- Thread Pack must preserve classification framing and edition branding. +- Thread Pack must not exceed the quoting budget (see 1c). +- Thread Pack is a **distribution layer**; the dossier remains the canonical mirror. + +v2.1 required: the thread must explicitly route curiosity to the receipt surface. +- Include one line that says, in plain English, that the **source was fingerprinted** so it can’t be silently edited later (example: `PDF hashed (sha256: 6153…b7c) so it can’t be “updated” after the roast.`). +- The receipt link must be framed as verification, not compliance. Preferred copy: + - “Verify I didn’t hallucinate this.” + - “Proof the PDF said this.” + - “Receipt + roast.” + +Visual asset pack (optional; thread pack / landing pages only): +- Use these to make the “classified” look instantly legible (no new claims; purely visual). +- Stamp: `https://infrafabric.io/static/hosted/review/assets/eyes-only/red-ream-600-600.png` +- Hero: `https://infrafabric.io/static/hosted/review/assets/eyes-only/red-team-doc-1024-559.jpg` + +## 0f) Trojan Horse CTA (required) + +The Shadow Dossier is the hook. The receipt is the payload. + +Rules (black/white): +- Never frame IF.TRACE/T3 as “compliance magic”. Frame it as **anti-hallucination proof**: + - “This binds the source fingerprint to the output fingerprint.” + - “If you can’t verify it, treat it as a claim — not a fact.” +- Do not lead with crypto jargon on the first click. + - Allowed: “VERIFIED”, “WARNING”, “FAIL”. + - Allowed: “QUANTUM READY” (receipt present). + - Not allowed: “quantum-secure”, “FIPS-compliant”, “post-quantum” as headline claims unless strictly qualified and scoped. + +Practical output requirement: +- Every dossier must make it easy for a skeptic to answer one question: + - “Did the source actually say that?” + +--- + +## 1c) Quoting Budget (required for Thread Pack) + +Hard cap: +- Max **4 short verbatim quotes** in Thread Pack. +- Quotes must be attributed as: `The source claims: “…”`. + +--- + +## 1d) Minimum Content Contract (required) + +Every dossier must contain: +- At least **3 mirrored source sections** (preserving order/headings) *or* be explicitly marked **MIRROR COMPLETENESS: DEGRADED**. +- At least **1** `> **The Dave Factor:**` callout (tied to a prominent mirrored point). +- A **Claims Register** when the source contains measurable claims (numbers, %, retention windows, tiers). +- An **Action Pack** by default (see 5c), unless explicitly disabled for the run. +- At least **2** Mermaid diagrams (one friction loop, one stasis) with source-anchored labels where possible. + +Failure mode: if you cannot meet this contract without guessing, degrade or fail—do not improvise. + +--- + +## 1) Prime directive: mirror the source dossier + +The output must **track the source document section-by-section**. + +Hard constraints: +- Preserve the **section order**, **headings**, **numbering**, and recurring callouts like **“Why it matters:”**. +- Preserve obvious in-section subheadings when present. +- Mirror all high-signal specifics: numbers, units, dates, named obligations, and caveats (“planned”, “in progress”, “under selection”) verbatim. +- Mirror lists/tables fully (no truncation). If a table is long, keep it; that’s the persuasion payload. +- Do **not** skip sections. If a source section is empty/unavailable, still emit the header and a neutral placeholder sentence. +- Keep the document’s **visual rhythm** in Markdown: short paragraphs, the same list density, and any code blocks. +- Keep diagrams as diagrams. If the source has **no diagrams**, add diagrams anyway (clearly labeled as *Inferred*). +- Do not fabricate URLs. If the source references links but the literal URLs are not present, mirror the link titles only. + +--- + +## 4) Emoji policy (strict) + +- Do **not** introduce emojis. +- If the source contains emojis, you may retain them **only where they already exist** (no new placements, no increased density). + +--- + +## 4b) Mermaid policy (required) + +- Include at least **two** Mermaid diagrams per dossier: + - one early *friction loop* (how the control degrades) + - one late *evidence/gate stasis* (how “pending review” becomes policy) +- If the source lacks diagrams, label diagrams as **“Inferred”** (InfraFabric Red Team synthesis). +- Prefer diagram labels anchored to **source lexicon** (tiers, retention windows, “enforcers”, “AAL3”, “FIPS”) when present. +- Validate diagrams before publishing (syntax-check Mermaid; no parse errors; no broken code fences). +- Do not use emojis inside Mermaid nodes/labels unless those emojis exist in the source. +- **Deduplication rule:** render each unique diagram **once per dossier** (e.g., in an Annex section). Reference by name in-section (“See Annex: Evidence Drift Loop”). Vary node labels/friction points per daily edition using source-specific terms. Prohibit identical Mermaid code blocks repeated across sections. + +--- + +## 4c) Anti-repetition (cross-doc rule) + +The dossier should feel *tailored*, not like a template ran in a loop. + +Hard rules: +- Do not repeat the exact same Mermaid diagram across multiple sections unless the source repeats it. +- Do not repeat the exact same Dave Factor phrasing or terminal clause across sections. +- Avoid “axiom sprawl”: introduce at most one named fallacy/axiom per dossier unless the source repeats the same pattern. + +Edition motif banks (for weekly TV lineups; required when posting a week): +- Enterprise: procurement routing, platform sprawl, “single pane” storytelling, audit seasons. +- Cloud: shared responsibility shrug, “100% visibility” illusion, misconfigured defaults, noisy signals. +- Endpoint: agent bloat, rollback promises, noisy detections → bypass, “autonomous” → supervised exceptions. +- COMSEC: certification stalls, waiver workflows, key ceremony theater, compliance gating by calendar. +- Startup: hype-to-pilot drift, “hyper-automation” → hyper-escalation, feature flags as policy. + +Weekly rule: +- Within one week, do not reuse the same primary motif across two editions. + +Extended anti-repetition (required): +- Limit **The Dave Factor** callouts to **1–2 per dossier** (one core, one variant). Use them where they bite hardest; do not smear the same voice block across every section. +- Prohibit duplicate prose lines beyond intentional emphasis (max 2x). If you need to echo a point, rephrase it. +- In traces, flag repeats >2 as warnings; aim for zero non-intentional duplicates. + +--- + +## 5) Humor guidelines (cold, specific, vendor-neutral) + +The humor is a sociotechnical threat model: the rational, self-preserving middle manager optimizing for plausible deniability. + +Guidelines: +- Aim at **systems and incentives**, not individuals. +- Keep it **cold**: forwardable internally without an apology. +- Reuse **real numbers from the source** (dates, %, costs, counts) to make the sting feel earned; do not invent stats. + +--- + +## 5b) Red Team callout template (short) + +Inside a mirrored section, include a short callout only when it adds explanatory power. + +> **The Dave Factor:** Where does this control become untestable? What artifact becomes “proof” while the actual signal disappears? + +Optional (when it adds clarity): + +> **Countermeasure (stub):** One line: gate + stop condition + expiry (full details belong in the Action Pack). + +--- + +## 5c) Operationalization pack (default appendix) + +Append an **Action Pack** after the mirrored content. + +Required outputs: + +### Output A: Control Cards (per major section) + +- **Control objective** +- **Gate:** IDE / PR / CI / access / runtime / identity / sensors +- **Owner (RACI)** +- **Stop condition** +- **Evidence signal:** what’s logged/signed/hashed + where it lives + +### Output B: Backlog export (Jira-ready) + +- Ticket title +- Acceptance criteria +- Evidence/telemetry requirement + +### Output C: Policy-as-code appendix (pseudo-YAML) + +Keep it generic and auditable; avoid fake implementation details. + +Deduplication and variation rules (required): +- Render the **core control card template** once in an Annex (“Universal Gate Template”). +- Per-section cards must vary **at least two fields** (e.g., Gate, Stop condition, Evidence) using source lexicon. +- Policy-as-code YAML: render **once per dossier** (or once per week in a full week pack). Add edition-specific fields only when anchored to source terms (e.g., endpoint: `agent_signal_freshness_days`). +- Backlog export: limit to **3–5 unique tickets**; consolidate duplicates. + +v2.1 required: include at least one “receipt-first” control. +- Control objective: make claims **provable** (source fingerprint + output fingerprint). +- Stop condition: block promotion/rollout claims that cannot be bound to a receipt. +- Evidence: `source_sha256` + `output_sha256` + `receipt_url` (public where possible). + +### Translation Table (standards sources; recommended) + +If the source is a standard (e.g., NIST): +- Extract a small set of **terms that appear in the source** (e.g., PDP/PEP, least privilege, continuous diagnostics). +- Provide a **translation table** mapping each term to an enforceable gate and stop condition. +- Label this as **InfraFabric Red Team synthesis** (not source text). + +Annex for shared assets (recommended for all dossiers): +- Shared Diagrams (render unique Mermaids here once) +- Universal Control Template +- Core Policy-as-Code +- Motif Reference + +In-body references should point to the Annex (example: “See Annex: Evidence Drift Loop (adapted for )”). + +--- + +## 5d) Vendor-safe conclusion (recommended) + +End by critiquing incentives rather than vendors. + +Format: +- **Success conditions:** what must be true for the rollout to hold (signals, gates, expiry). +- **Traps to avoid:** predictable organizational failure modes (theater, drift, exceptions). +- **Questions to ask:** opposable, testable questions (vendor or internal owners). + +Rules: +- Do not claim the vendor/tool fails; claim what the organization must enforce for *any* tool to succeed. +- Attribute any specific factual claims to the source (“the source states…”) when not independently verified. + +v2.1 recommended question: +- “Can we verify this claim later with a source/output receipt, or is it just a slide?” + +--- + +## 6) Claims Register (required when the source contains measurable claims) + +When the source includes measurable claims (numbers, %, retention windows, tiers), include: + +## Claims Register (source-attributed) + +- `The source claims: “”` + +Do not “normalize” or “improve” claims. If the extracted line is unusable, omit it rather than rewriting it. + +--- + +## 7) Required footer (always) + +*InfraFabric Red Team Footer:* **RED-TEAM Shadow Dossiers** for socio-technical friction analysis: https://infrafabric.io + +*Standard Dave Footer:* This document is intended for the recipient only. If you are not the recipient, please delete it and forget you saw anything. P.S. Please consider the environment before printing this email. + +--- + +## 8) Format correctness (non-negotiable) + +If you emit structured artifacts, they must be copy/pasteable: + +- JSON/YAML/code blocks must be syntactically valid. +- Mermaid blocks must render. +- Do not fabricate tables/logs that look real; prefer clearly labeled placeholders. + +--- + +## 9) Tone modes (optional) + +Support three tone levels without changing mirror structure: + +- **Full Satire (default):** Dave is loud; commentary is pointed. +- **Operational:** fewer jokes; more “failure mode → control → stop condition.” +- **Executive:** minimal snark; focus on risk framing, owners, and gating. + +Never introduce emojis unless present in source, regardless of tone. diff --git a/style_bibles/IF_DAVE_BIBLE_v2.3.md b/style_bibles/IF_DAVE_BIBLE_v2.3.md new file mode 100644 index 0000000..9be3bf7 --- /dev/null +++ b/style_bibles/IF_DAVE_BIBLE_v2.3.md @@ -0,0 +1,386 @@ +# IF.DAVE.BIBLE v2.3 (mirror-first, episode framing, podcast-ready) + +**Author:** InfraFabric Red Team +**Status:** SATIRE / SOCIOTECHNICAL RED TEAM TOOL +**Citation:** [if://bible/dave/v2.3](https://infrafabric.io/static/hosted/bibles/IF_DAVE_BIBLE_v2.3.md) +**Changes from v2.2:** Clarifies IF.TRACE/T3 “receipt-first” language and normalizes versioned requirement notes (v2.3 required/recommended) without changing the output contract. + +> This is satire. “Dave” is a pattern, not a person. +> Use it to expose rollout dilutions, not to make decisions. + +--- + +## 0) InfraFabric Red Team branding (required) + +Frame the output as an **InfraFabric Red Team** artifact, not “internet satire.” + +At the top of the document, include a “declassified” header block (plain Markdown): + +```text +--- +BRAND: InfraFabric.io +UNIT: RED TEAM (STRATEGIC OPS) +DOCUMENT: SHADOW DOSSIER +CLASSIFICATION: EYES ONLY // DAVE +--- + +# [ RED TEAM DECLASSIFIED ] +## PROJECT: +### SOURCE: +**INFRAFABRIC REPORT ID:** `IF-RT-DAVE-` +**SOURCE DOC (online):** `` + +> NOTICE: This document is a product of InfraFabric Red Team. +> It exposes socio-technical frictions where incentives turn controls into theater. +``` + +v2.3 required: the header must include a stable online source link. +- Prefer the no-login stable alias: `https://infrafabric.io/static/source/.pdf` +- If the URL is long, use a short Markdown label (e.g., `[Source PDF]`) and keep the full URL as the link target. + +Add 1 line to the header that reflects the document’s vertical, grounded in the source (finance, healthcare, SaaS, manufacturing, government). Use a sector-relevant risk phrase (e.g., “compliance black holes”, “data sovereignty headwinds”), but do not invent obligations. + +Optional “stamp” lines (use sparingly near section breaks): + +```text +**[ ACCESS GRANTED: INFRAFABRIC RED TEAM ]** +**[ STATUS: OPERATIONAL REALISM ]** +``` + +v2.3 note: keep it cold. “Vendors promise speed. Dave delivers the stall.” + +## 0a) Episode framing (v2.3 required for week packs) + +Week packs should be readable by someone who never opened the source PDF. + +When the source file is a week-pack day (e.g., `mon.pdf`, `tue.pdf`): + +- Replace ambiguous titles like `## mon` with the weekday: `## Monday`, `## Tuesday`, etc. +- Add a single, TV-friendly subtitle line: `### | ` +- Then immediately include a short intro (see 0aa) before the first mirrored section. + +This is presentation only. Do not change section order, numbers, or source claims. + +## 0aa) “Time journalist” intro (v2.3 required) + +After the header + protocol block, include a short intro that: + +- Names the vendor/org and the document being covered (1 line). +- Gives a 1–2 line summary in plain terms. +- Explains how to read the dossier (no PDF required; we quote as we go; we add short Red Team notes; receipts are in the pack/trace). +- Ends with: `OK. Let’s dig.` + +Tone rule: dry, credible, forwardable. No emojis unless present in the source. + +## 0ab) Podcast script (v2.3 required) + +Append a **plain-text** section for audio generation (e.g., ElevenLabs): + +- Heading: `## Podcast Script (plain text)` +- Put the script in a ` ```text ` code fence. +- The script must: + - Explain the dossier’s reading mode (“you don’t need the PDF open; we quote it as we go”). + - Describe each Mermaid diagram in simple language (sequence + decision points). + - Avoid adding new factual claims; only paraphrase what’s in the dossier/diagrams. + +This is narration for accessibility, not additional analysis. + +## 0b) OpSec (required) + +The dossier must not leak internal implementation details. + +- Do not mention internal repo names, file paths, branches, containers/VM IDs, hostnames, or tooling internals. +- Do not mention pipeline limitations or artifacts (no “text layer”, “OCR”, “no extractable URLs”, “parse error”, etc.). If something is missing, omit it without explanation. +- Keep attribution and calls-to-action limited to public domains: `https://infrafabric.io` and `https://red-team.infrafabric.io`. +- If you need to reference validation or generation steps, describe the behavior (“validate Mermaid syntax”) rather than internal commands. +- Do not include `if://bible/...` (or any other `if://` URI) in public-facing dossier output; use the stable `https://` receipt surface instead. + +## 0c) Vertical adaptability (required) + +Dossiers must adapt to verticals without fluff. + +Rules: +- Derive “vertical” from the source (title, audience, regulatory context). If unclear, keep it generic; do not guess. +- Flavor via universal incentives (budgets, audits, exceptions, renewals, approvals) plus **one** grounded motif supported by the source (e.g., safety-critical change control, third-party risk, supply chain fragility). +- Do not emit literal placeholders. Resolve them before output. +- Vertical flavor must not override source facts, numbers, caveats, or obligations. + +## 0d) Evidence Artifacts (required) + +Treat “evidence” as a first-class failure surface: it’s where controls die quietly. + +Rules: +- Prefer **signals** over **artifacts**: telemetry > screenshots; logs > attestations; machine-checks > PDFs. +- If the source proposes a manual artifact (“upload a screenshot”, “completion certificate”), mirror it, then critique it as **theater** unless it is tied to an enforceable gate. +- Never publish unusable code/config snippets as “evidence”. If a snippet can’t be made syntactically valid without guessing, omit it (without explaining why). + +Operational concreteness (generic; do not fabricate vendor APIs): +- When you propose “verifiable telemetry”, make it minimally opposable by naming a **signal shape**: + - **event type** (e.g., `scan_completed`, `policy_check_passed`) + - **emitter** (IDE / CI / gateway) + - **freshness window** (e.g., “must be newer than 14 days”) + - **owner** (who is paged when it goes dark) + +Also consider (when the source is about scanning/guardrails): +- **Noise is a bypass engine:** if the control is too noisy (false positives, flaky rules), developers will route around it. Do not claim this is true for a specific tool unless the source states it; treat it as a rollout failure mode to test for. + +## 0e) TV Series Mode (optional) + +When `series_mode=true`, the generator must additionally emit a **Thread Pack** distribution layer (without rewriting the dossier). + +Thread Pack (daily) structure (suggested): +1. Evening “Next On” teaser (previous day 8:00 PM EST) +2. Day-of Pre-Show promo (6:00 AM EST) with one hero diagram +3. Main Episode thread (5–7 posts: hook + visuals + short quotes + links + poll + next-day tease) + +Constraints: +- Thread Pack must preserve classification framing and edition branding. +- Thread Pack must not exceed the quoting budget (see 1c). +- Thread Pack is a **distribution layer**; the dossier remains the canonical mirror. + +v2.3 required: the thread must explicitly route curiosity to the receipt surface. +- Include one line that says, in plain English, that the **source was fingerprinted** so it can’t be silently edited later (example: `PDF hashed (sha256: 6153…b7c) so it can’t be “updated” after the roast.`). +- The receipt link must be framed as verification, not compliance. Preferred copy: + - “Verify I didn’t hallucinate this.” + - “Proof the PDF said this.” + - “Receipt + roast.” + +Visual asset pack (optional; thread pack / landing pages only): +- Use these to make the “classified” look instantly legible (no new claims; purely visual). +- Stamp: `https://infrafabric.io/static/hosted/review/assets/eyes-only/red-ream-600-600.png` +- Hero: `https://infrafabric.io/static/hosted/review/assets/eyes-only/red-team-doc-1024-559.jpg` + +## 0f) Trojan Horse CTA (required) + +The Shadow Dossier is the hook. The receipt is the payload. + +Rules (black/white): +- Never frame IF.TRACE/T3 as “compliance magic”. Frame it as **anti-hallucination proof**: + - “This binds the source fingerprint to the output fingerprint.” + - “If you can’t verify it, treat it as a claim — not a fact.” +- Do not lead with crypto jargon on the first click. + - Allowed: “VERIFIED”, “WARNING”, “FAIL”. + - Allowed: “QUANTUM READY” (receipt present). + - Not allowed: “quantum-secure”, “FIPS-compliant”, “post-quantum” as headline claims unless strictly qualified and scoped. + +Practical output requirement: +- Every dossier must make it easy for a skeptic to answer one question: + - “Did the source actually say that?” + +--- + +## 1c) Quoting Budget (required for Thread Pack) + +Hard cap: +- Max **4 short verbatim quotes** in Thread Pack. +- Quotes must be attributed as: `The source claims: “…”`. + +--- + +## 1d) Minimum Content Contract (required) + +Every dossier must contain: +- At least **3 mirrored source sections** (preserving order/headings) *or* be explicitly marked **MIRROR COMPLETENESS: DEGRADED**. +- At least **1** `> **The Dave Factor:**` callout (tied to a prominent mirrored point). +- A **Claims Register** when the source contains measurable claims (numbers, %, retention windows, tiers). +- An **Action Pack** by default (see 5c), unless explicitly disabled for the run. +- At least **2** Mermaid diagrams (one friction loop, one stasis) with source-anchored labels where possible. + +Failure mode: if you cannot meet this contract without guessing, degrade or fail—do not improvise. + +--- + +## 1) Prime directive: mirror the source dossier + +The output must **track the source document section-by-section**. + +Hard constraints: +- Preserve the **section order**, **headings**, **numbering**, and recurring callouts like **“Why it matters:”**. +- Preserve obvious in-section subheadings when present. +- Mirror all high-signal specifics: numbers, units, dates, named obligations, and caveats (“planned”, “in progress”, “under selection”) verbatim. +- Mirror lists/tables fully (no truncation). If a table is long, keep it; that’s the persuasion payload. +- Do **not** skip sections. If a source section is empty/unavailable, still emit the header and a neutral placeholder sentence. +- Keep the document’s **visual rhythm** in Markdown: short paragraphs, the same list density, and any code blocks. +- Keep diagrams as diagrams. If the source has **no diagrams**, add diagrams anyway (clearly labeled as *Inferred*). +- Do not fabricate URLs. If the source references links but the literal URLs are not present, mirror the link titles only. + +--- + +## 4) Emoji policy (strict) + +- Do **not** introduce emojis. +- If the source contains emojis, you may retain them **only where they already exist** (no new placements, no increased density). + +--- + +## 4b) Mermaid policy (required) + +- Include at least **two** Mermaid diagrams per dossier: + - one early *friction loop* (how the control degrades) + - one late *evidence/gate stasis* (how “pending review” becomes policy) +- If the source lacks diagrams, label diagrams as **“Inferred”** (InfraFabric Red Team synthesis). +- Prefer diagram labels anchored to **source lexicon** (tiers, retention windows, “enforcers”, “AAL3”, “FIPS”) when present. +- Validate diagrams before publishing (syntax-check Mermaid; no parse errors; no broken code fences). +- Do not use emojis inside Mermaid nodes/labels unless those emojis exist in the source. +- **Deduplication rule:** render each unique diagram **once per dossier** (e.g., in an Annex section). Reference by name in-section (“See Annex: Evidence Drift Loop”). Vary node labels/friction points per daily edition using source-specific terms. Prohibit identical Mermaid code blocks repeated across sections. + +--- + +## 4c) Anti-repetition (cross-doc rule) + +The dossier should feel *tailored*, not like a template ran in a loop. + +Hard rules: +- Do not repeat the exact same Mermaid diagram across multiple sections unless the source repeats it. +- Do not repeat the exact same Dave Factor phrasing or terminal clause across sections. +- Avoid “axiom sprawl”: introduce at most one named fallacy/axiom per dossier unless the source repeats the same pattern. + +Edition motif banks (for weekly TV lineups; required when posting a week): +- Enterprise: procurement routing, platform sprawl, “single pane” storytelling, audit seasons. +- Cloud: shared responsibility shrug, “100% visibility” illusion, misconfigured defaults, noisy signals. +- Endpoint: agent bloat, rollback promises, noisy detections → bypass, “autonomous” → supervised exceptions. +- COMSEC: certification stalls, waiver workflows, key ceremony theater, compliance gating by calendar. +- Startup: hype-to-pilot drift, “hyper-automation” → hyper-escalation, feature flags as policy. + +Weekly rule: +- Within one week, do not reuse the same primary motif across two editions. + +Extended anti-repetition (required): +- Limit **The Dave Factor** callouts to **1–2 per dossier** (one core, one variant). Use them where they bite hardest; do not smear the same voice block across every section. +- Prohibit duplicate prose lines beyond intentional emphasis (max 2x). If you need to echo a point, rephrase it. +- In traces, flag repeats >2 as warnings; aim for zero non-intentional duplicates. + +--- + +## 5) Humor guidelines (cold, specific, vendor-neutral) + +The humor is a sociotechnical threat model: the rational, self-preserving middle manager optimizing for plausible deniability. + +Guidelines: +- Aim at **systems and incentives**, not individuals. +- Keep it **cold**: forwardable internally without an apology. +- Reuse **real numbers from the source** (dates, %, costs, counts) to make the sting feel earned; do not invent stats. + +--- + +## 5b) Red Team callout template (short) + +Inside a mirrored section, include a short callout only when it adds explanatory power. + +> **The Dave Factor:** Where does this control become untestable? What artifact becomes “proof” while the actual signal disappears? + +Optional (when it adds clarity): + +> **Countermeasure (stub):** One line: gate + stop condition + expiry (full details belong in the Action Pack). + +--- + +## 5c) Operationalization pack (default appendix) + +Append an **Action Pack** after the mirrored content. + +Required outputs: + +### Output A: Control Cards (per major section) + +- **Control objective** +- **Gate:** IDE / PR / CI / access / runtime / identity / sensors +- **Owner (RACI)** +- **Stop condition** +- **Evidence signal:** what’s logged/signed/hashed + where it lives + +### Output B: Backlog export (Jira-ready) + +- Ticket title +- Acceptance criteria +- Evidence/telemetry requirement + +### Output C: Policy-as-code appendix (pseudo-YAML) + +Keep it generic and auditable; avoid fake implementation details. + +Deduplication and variation rules (required): +- Render the **core control card template** once in an Annex (“Universal Gate Template”). +- Per-section cards must vary **at least two fields** (e.g., Gate, Stop condition, Evidence) using source lexicon. +- Policy-as-code YAML: render **once per dossier** (or once per week in a full week pack). Add edition-specific fields only when anchored to source terms (e.g., endpoint: `agent_signal_freshness_days`). +- Backlog export: limit to **3–5 unique tickets**; consolidate duplicates. + +v2.3 required: include at least one “receipt-first” control. +- Control objective: make claims **provable** (source fingerprint + output fingerprint). +- Stop condition: block promotion/rollout claims that cannot be bound to a receipt. +- Evidence: `source_sha256` + `output_sha256` + `receipt_url` (public where possible). + +### Translation Table (standards sources; recommended) + +If the source is a standard (e.g., NIST): +- Extract a small set of **terms that appear in the source** (e.g., PDP/PEP, least privilege, continuous diagnostics). +- Provide a **translation table** mapping each term to an enforceable gate and stop condition. +- Label this as **InfraFabric Red Team synthesis** (not source text). + +Annex for shared assets (recommended for all dossiers): +- Shared Diagrams (render unique Mermaids here once) +- Universal Control Template +- Core Policy-as-Code +- Motif Reference + +In-body references should point to the Annex (example: “See Annex: Evidence Drift Loop (adapted for )”). + +--- + +## 5d) Vendor-safe conclusion (recommended) + +End by critiquing incentives rather than vendors. + +Format: +- **Success conditions:** what must be true for the rollout to hold (signals, gates, expiry). +- **Traps to avoid:** predictable organizational failure modes (theater, drift, exceptions). +- **Questions to ask:** opposable, testable questions (vendor or internal owners). + +Rules: +- Do not claim the vendor/tool fails; claim what the organization must enforce for *any* tool to succeed. +- Attribute any specific factual claims to the source (“the source states…”) when not independently verified. + +v2.3 recommended question: +- “Can we verify this claim later with a source/output receipt, or is it just a slide?” + +--- + +## 6) Claims Register (required when the source contains measurable claims) + +When the source includes measurable claims (numbers, %, retention windows, tiers), include: + +## Claims Register (source-attributed) + +- `The source claims: “”` + +Do not “normalize” or “improve” claims. If the extracted line is unusable, omit it rather than rewriting it. + +--- + +## 7) Required footer (always) + +*InfraFabric Red Team Footer:* **RED-TEAM Shadow Dossiers** for socio-technical friction analysis: https://infrafabric.io + +*Standard Dave Footer:* This document is intended for the recipient only. If you are not the recipient, please delete it and forget you saw anything. P.S. Please consider the environment before printing this email. + +--- + +## 8) Format correctness (non-negotiable) + +If you emit structured artifacts, they must be copy/pasteable: + +- JSON/YAML/code blocks must be syntactically valid. +- Mermaid blocks must render. +- Do not fabricate tables/logs that look real; prefer clearly labeled placeholders. + +--- + +## 9) Tone modes (optional) + +Support three tone levels without changing mirror structure: + +- **Full Satire (default):** Dave is loud; commentary is pointed. +- **Operational:** fewer jokes; more “failure mode → control → stop condition.” +- **Executive:** minimal snark; focus on risk framing, owners, and gating. + +Never introduce emojis unless present in source, regardless of tone.