Infrafabric-POC-docs/tools/build_dossier_editions.py

216 lines
6.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class Rule:
name: str
match: re.Pattern[str]
mode: str # "drop_section" | "rename_heading"
replacement: str | None = None
HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)$")
CODE_FENCE_RE = re.compile(r"^```")
def _heading_level(line: str) -> int | None:
match = HEADING_RE.match(line)
if not match:
return None
return len(match.group(1))
def _is_heading(line: str) -> bool:
return HEADING_RE.match(line) is not None
def _rename_scaffold_heading(text: str) -> str:
# Convert "HOOK: The Question" -> "The Question"
# This preserves the author's voice but removes visible scaffolding labels.
scaffold = ("HOOK:", "FLAW:", "SETUP:", "TENSION:", "TWIST:", "PUNCH:")
for token in scaffold:
if text.strip().upper().startswith(token):
return text.split(":", 1)[1].strip()
# Remove other scaffold-y heading labels that read like writing instructions.
if text.strip().lower().startswith("the punchline for your ciso"):
# keep intent but remove the scaffold label
return "Executive takeaway (security leadership)"
return text
def rewrite_markdown(
content: str,
*,
drop_headings: set[str],
drop_heading_prefixes: tuple[str, ...] = (),
remove_story_sections: bool = False,
) -> str:
lines = content.splitlines(True)
out: list[str] = []
in_code = False
dropping = False
drop_level: int | None = None
def should_drop_heading(title: str) -> bool:
if title in drop_headings:
return True
for prefix in drop_heading_prefixes:
if title.startswith(prefix):
return True
if remove_story_sections:
if title.startswith("IF.STORY | The Origin Story"):
return True
if title.startswith("The Origin Story —"):
return True
if title.startswith("Story "):
return True
if title.startswith("Cold Open —"):
return True
return False
for raw in lines:
line = raw.rstrip("\n")
if CODE_FENCE_RE.match(line):
in_code = not in_code
out.append(raw) if not dropping else None
continue
if not in_code and _is_heading(line):
level = _heading_level(line)
assert level is not None
title = HEADING_RE.match(line).group(2).strip() # type: ignore[union-attr]
# End a dropped section when we hit a heading at same/higher level.
if dropping and drop_level is not None and level <= drop_level:
dropping = False
drop_level = None
# Start drop if this heading matches.
if should_drop_heading(title):
dropping = True
drop_level = level
continue
# Rename scaffolding headings.
renamed = _rename_scaffold_heading(title)
if renamed != title:
out.append(f"{'#' * level} {renamed}\n")
continue
if dropping:
continue
out.append(raw)
return "".join(out)
def write_edition(
*,
src: Path,
dst: Path,
title: str,
quote: str,
intro_lines: list[str],
body: str,
) -> None:
header = [
f"# {title}\n",
"\n",
f"> {quote}\n",
"\n",
]
header.extend([line + "\n" for line in intro_lines])
header.append("\n---\n\n")
dst.write_text("".join(header) + body, encoding="utf-8")
def main() -> int:
repo_root = Path(__file__).resolve().parents[1]
src = repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md"
if not src.exists():
raise SystemExit(f"missing input: {src}")
original = src.read_text(encoding="utf-8")
drop_common = {
# self-validation theater / external reviews embedded in the dossier
"**External Evaluation & Velocity Calibration**",
"**Independent Technical Risk Review (Excerpt)**",
"Evidence Artifact: External Review Excerpt (Claude Sonnet 4.5) — “Black Mirror or inevitability?”",
"P.S. (Post Scriptum)",
}
drop_prefixes_submission = (
"ANNEX (Non-Technical, Satire): The Dave Factor",
# remove “macro evaluator” sections from IF.emotion; keep them only in uncut
"12. Superintelligence Perspective:",
"12. Superintelligence Perspective",
# remove business-case marketing sections from clean submissions
"Part 7: The Business Case",
)
clean_body = rewrite_markdown(
original,
drop_headings=drop_common,
drop_heading_prefixes=drop_prefixes_submission,
remove_story_sections=True,
)
data_body = rewrite_markdown(
original,
drop_headings=drop_common,
drop_heading_prefixes=drop_prefixes_submission,
remove_story_sections=True,
)
quote = (
"The models answer is ephemeral. The trace is the product. If you cant prove what happened, "
"you are not running an AI system — you are running a scripted reality show."
)
clean_intro = [
"**Edition:** Clean submission (full content, theater removed)",
"**Scope:** Microlab; claims are scoped to verifiable artifacts and explicit boundaries.",
"**Note:** This edition removes self-validation sections and scaffolding headings but keeps the authors voice.",
]
data_intro = [
"**Edition:** Data-driven (full content, story sections removed)",
"**Scope:** Microlab; metrics are treated as self-reported unless independently attested.",
"**Note:** This edition removes story/origin sections to keep the document “boring on purpose”.",
]
write_edition(
src=src,
dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md",
title="InfraFabric Dossier — Submission Edition (Clean, Full) v2.0",
quote=quote,
intro_lines=clean_intro,
body=clean_body,
)
write_edition(
src=src,
dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md",
title="InfraFabric Dossier — DataDriven Edition (Full) v2.0",
quote=quote,
intro_lines=data_intro,
body=data_body,
)
print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md")
print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md")
return 0
if __name__ == "__main__":
raise SystemExit(main())