Generate full clean and data-driven dossier editions

This commit is contained in:
root 2025-12-22 02:20:21 +00:00
parent 8c1c448f28
commit f3fc5a58b2
3 changed files with 55179 additions and 0 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,208 @@
#!/usr/bin/env python3
from __future__ import annotations
import re
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class Rule:
name: str
match: re.Pattern[str]
mode: str # "drop_section" | "rename_heading"
replacement: str | None = None
HEADING_RE = re.compile(r"^(#{1,6})\s+(.*)$")
CODE_FENCE_RE = re.compile(r"^```")
def _heading_level(line: str) -> int | None:
match = HEADING_RE.match(line)
if not match:
return None
return len(match.group(1))
def _is_heading(line: str) -> bool:
return HEADING_RE.match(line) is not None
def _rename_scaffold_heading(text: str) -> str:
# Convert "HOOK: The Question" -> "The Question"
# This preserves the author's voice but removes visible scaffolding labels.
scaffold = ("HOOK:", "FLAW:", "SETUP:", "TENSION:", "TWIST:", "PUNCH:")
for token in scaffold:
if text.strip().upper().startswith(token):
return text.split(":", 1)[1].strip()
return text
def rewrite_markdown(
content: str,
*,
drop_headings: set[str],
drop_heading_prefixes: tuple[str, ...] = (),
remove_story_sections: bool = False,
) -> str:
lines = content.splitlines(True)
out: list[str] = []
in_code = False
dropping = False
drop_level: int | None = None
def should_drop_heading(title: str) -> bool:
if title in drop_headings:
return True
for prefix in drop_heading_prefixes:
if title.startswith(prefix):
return True
if remove_story_sections:
if title.startswith("IF.STORY | The Origin Story"):
return True
if title.startswith("The Origin Story —"):
return True
if title.startswith("Story "):
return True
if title.startswith("Cold Open —"):
return True
return False
for raw in lines:
line = raw.rstrip("\n")
if CODE_FENCE_RE.match(line):
in_code = not in_code
out.append(raw) if not dropping else None
continue
if not in_code and _is_heading(line):
level = _heading_level(line)
assert level is not None
title = HEADING_RE.match(line).group(2).strip() # type: ignore[union-attr]
# End a dropped section when we hit a heading at same/higher level.
if dropping and drop_level is not None and level <= drop_level:
dropping = False
drop_level = None
# Start drop if this heading matches.
if should_drop_heading(title):
dropping = True
drop_level = level
continue
# Rename scaffolding headings.
renamed = _rename_scaffold_heading(title)
if renamed != title:
out.append(f"{'#' * level} {renamed}\n")
continue
if dropping:
continue
out.append(raw)
return "".join(out)
def write_edition(
*,
src: Path,
dst: Path,
title: str,
quote: str,
intro_lines: list[str],
body: str,
) -> None:
header = [
f"# {title}\n",
"\n",
f"> {quote}\n",
"\n",
]
header.extend([line + "\n" for line in intro_lines])
header.append("\n---\n\n")
dst.write_text("".join(header) + body, encoding="utf-8")
def main() -> int:
repo_root = Path(__file__).resolve().parents[1]
src = repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md"
if not src.exists():
raise SystemExit(f"missing input: {src}")
original = src.read_text(encoding="utf-8")
drop_common = {
# self-validation theater / external reviews embedded in the dossier
"**External Evaluation & Velocity Calibration**",
"**Independent Technical Risk Review (Excerpt)**",
"Evidence Artifact: External Review Excerpt (Claude Sonnet 4.5) — “Black Mirror or inevitability?”",
"P.S. (Post Scriptum)",
}
clean_body = rewrite_markdown(
original,
drop_headings=drop_common,
drop_heading_prefixes=(
"ANNEX (Non-Technical, Satire): The Dave Factor",
),
remove_story_sections=False,
)
data_body = rewrite_markdown(
original,
drop_headings=drop_common,
drop_heading_prefixes=(
"ANNEX (Non-Technical, Satire): The Dave Factor",
),
remove_story_sections=True,
)
quote = (
"The models answer is ephemeral. The trace is the product. If you cant prove what happened, "
"you are not running an AI system — you are running a scripted reality show."
)
clean_intro = [
"**Edition:** Clean submission (full content, theater removed)",
"**Scope:** Microlab; claims are scoped to verifiable artifacts and explicit boundaries.",
"**Note:** This edition removes self-validation sections and scaffolding headings but keeps the authors voice.",
]
data_intro = [
"**Edition:** Data-driven (full content, story sections removed)",
"**Scope:** Microlab; metrics are treated as self-reported unless independently attested.",
"**Note:** This edition removes story/origin sections to keep the document “boring on purpose”.",
]
write_edition(
src=src,
dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md",
title="InfraFabric Dossier — Submission Edition (Clean, Full) v2.0",
quote=quote,
intro_lines=clean_intro,
body=clean_body,
)
write_edition(
src=src,
dst=repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md",
title="InfraFabric Dossier — DataDriven Edition (Full) v2.0",
quote=quote,
intro_lines=data_intro,
body=data_body,
)
print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_SUBMISSION_EDITION_FULL.md")
print("wrote: DANNY_STOCKER_INFRAFABRIC_DOSSIER_DATA_DRIVEN_EDITION_FULL.md")
return 0
if __name__ == "__main__":
raise SystemExit(main())