Infrafabric-POC-docs/tools/build_dossier_editions.py

#!/usr/bin/env python3
from __future__ import annotations

import re
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class Rule:
    name: str
    match: re.Pattern[str]
    mode: str  # "drop_section" | "rename_heading"
    replacement: str | None = None


HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)$")
CODE_FENCE_RE = re.compile(r"^```")


def _heading_level(line: str) -> int | None:
    match = HEADING_RE.match(line)
    if not match:
        return None
    return len(match.group(1))


def _is_heading(line: str) -> bool:
    return HEADING_RE.match(line) is not None


def _rename_scaffold_heading(text: str) -> str:
    # Convert "HOOK: The Question" -> "The Question"
    # This preserves the author's voice but removes visible scaffolding labels.
    scaffold = ("HOOK:", "FLAW:", "SETUP:", "TENSION:", "TWIST:", "PUNCH:")
    for token in scaffold:
        if text.strip().upper().startswith(token):
            return text.split(":", 1)[1].strip()
    # Remove other scaffold-y heading labels that read like writing instructions.
    if text.strip().lower().startswith("the punchline for your ciso"):
        # keep intent but remove the scaffold label
        return "Executive takeaway (security leadership)"
    return text


def rewrite_markdown(
    content: str,
    *,
    drop_headings: set[str],
    drop_heading_prefixes: tuple[str, ...] = (),
    remove_story_sections: bool = False,
) -> str:
    lines = content.splitlines(True)
    out: list[str] = []

    in_code = False
    dropping = False
    drop_level: int | None = None

    def should_drop_heading(title: str) -> bool:
        if title in drop_headings:
            return True
        for prefix in drop_heading_prefixes:
            if title.startswith(prefix):
                return True
        if remove_story_sections:
            if title.startswith("IF.STORY | The Origin Story"):
                return True
            if title.startswith("The Origin Story —"):
                return True
            if title.startswith("Story "):
                return True
            if title.startswith("Cold Open —"):
                return True
        return False

    for raw in lines:
        line = raw.rstrip("\n")

        if CODE_FENCE_RE.match(line):
            in_code = not in_code
            out.append(raw) if not dropping else None
            continue

        if not in_code and _is_heading(line):
            level = _heading_level(line)
            assert level is not None
            title = HEADING_RE.match(line).group(2).strip()  # type: ignore[union-attr]

            # End a dropped section when we hit a heading at same/higher level.
            if dropping and drop_level is not None and level <= drop_level:
                dropping = False
                drop_level = None

            # Start drop if this heading matches.
            if should_drop_heading(title):
                dropping = True
                drop_level = level
                continue

            # Rename scaffolding headings.
            renamed = _rename_scaffold_heading(title)
            if renamed != title:
                out.append(f"{'#' * level} {renamed}\n")
                continue

        if dropping:
            continue

        out.append(raw)

    return "".join(out)


def write_edition(
    *,
    src: Path,
    dst: Path,
    title: str,
    quote: str,
    intro_lines: list[str],
    body: str,
) -> None:
    header = [
        f"# {title}\n",
        "\n",
        f"> {quote}\n",
        "\n",
    ]
    header.extend([line + "\n" for line in intro_lines])
    header.append("\n---\n\n")

    dst.write_text("".join(header) + body, encoding="utf-8")


def main() -> int:
    repo_root = Path(__file__).resolve().parents[1]
    src = repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md"
    if not src.exists():
        raise SystemExit(f"missing input: {src}")

    original = src.read_text(encoding="utf-8")

    drop_common = {
        # self-validation theater / external reviews embedded in the dossier
        "**External Evaluation & Velocity Calibration**",
        "**Independent Technical Risk Review (Excerpt)**",
        "Evidence Artifact: External Review Excerpt (Claude Sonnet 4.5) — “Black Mirror or inevitability?”",
        "P.S. (Post Scriptum)",
    }

    drop_prefixes_submission = (
        "ANNEX (Non-Technical, Satire): The Dave Factor",
        # remove “macro evaluator” sections from IF.emotion; keep them only in uncut
        "12. Superintelligence Perspective:",
        "12. Superintelligence Perspective",
        # remove business-case marketing sections from clean submissions
        "Part 7: The Business Case",
    )

    clean_body = rewrite_markdown(
        original,
        drop_headings=drop_common,
        drop_heading_prefixes=drop_prefixes_submission,
        remove_story_sections=True,
    )

    data_body = rewrite_markdown(
        original,
        drop_headings=drop_common,
        drop_heading_prefixes=drop_prefixes_submission,
        remove_story_sections=True,
    )

    quote = (
        "The model’s answer is ephemeral. The trace is the product. If you can’t prove what happened, "
        "you are not running an AI system — you are running a scripted reality show."
    )

    clean_intro = [
        "**Edition:** Clean submission (full content, theater removed)",
        "**Scope:** Microlab; claims are scoped to verifiable artifacts and explicit boundaries.",
        "**Note:** This edition removes self-validation sections and scaffolding headings but keeps the author’s voice.",
    ]

    data_intro = [
        "**Edition:** Data-driven (full content, story sections removed)",
        "**Scope:** Microlab; metrics are treated as self-reported unless independently attested.",
        "**Note:** This edition removes story/origin sections to keep the document “boring on purpose”.",
    ]

    write_edition(
        src=src,
        dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md",
        title="InfraFabric Dossier — Submission Edition (Clean, Full) v2.0",
        quote=quote,
        intro_lines=clean_intro,
        body=clean_body,
    )

    write_edition(
        src=src,
        dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md",
        title="InfraFabric Dossier — Data‑Driven Edition (Full) v2.0",
        quote=quote,
        intro_lines=data_intro,
        body=data_body,
    )

    print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md")
    print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md")
    return 0


if __name__ == "__main__":
    raise SystemExit(main())