#!/usr/bin/env python3 from __future__ import annotations import argparse import csv import datetime as _dt import hashlib import json import os import re import subprocess import textwrap import uuid from collections import deque from dataclasses import dataclass from pathlib import Path from urllib.parse import urlparse @dataclass(frozen=True) class DayConfig: day: str edition: str hashtag: str source_url: str def _repo_root() -> Path: return Path(__file__).resolve().parents[2] def _run(cmd: list[str], *, cwd: Path | None = None, env: dict[str, str] | None = None) -> subprocess.CompletedProcess[str]: return subprocess.run( cmd, cwd=str(cwd) if cwd else None, env=env, check=True, capture_output=True, text=True, ) def _sha256_file(path: Path) -> str: h = hashlib.sha256() with path.open("rb") as f: for chunk in iter(lambda: f.read(1024 * 1024), b""): h.update(chunk) return h.hexdigest() def _sha256_text(text: str) -> str: return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest() def _write_sha256_sidecar(path: Path) -> None: path.with_suffix(path.suffix + ".sha256").write_text(_sha256_file(path) + "\n", encoding="utf-8") def _utc_now() -> str: return _dt.datetime.now(tz=_dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z") def _download(url: str, dest: Path) -> None: dest.parent.mkdir(parents=True, exist_ok=True) _run( [ "curl", "-fsSL", "-L", "--retry", "3", "--retry-delay", "1", "-o", str(dest), url, ] ) def _guess_ext(url: str, default: str = ".pdf") -> str: try: path = urlparse(url).path or "" except Exception: path = "" ext = Path(path).suffix.lower() if ext in {".pdf", ".md", ".txt"}: return ext return default def _revoice_env() -> dict[str, str]: repo_root = _repo_root() env = dict(os.environ) env["PYTHONPATH"] = str(repo_root / "src") return env def _revoice_extract(*, input_path: Path, output_txt: Path) -> None: repo_root = _repo_root() _run( ["python3", "-m", "revoice", "extract", "--input", str(input_path), "--output", str(output_txt)], cwd=repo_root, env=_revoice_env(), ) def _revoice_generate(*, style: str, input_path: Path, output_md: Path, env: dict[str, str] | None = None) -> None: repo_root = _repo_root() _run( ["python3", "-m", "revoice", "generate", "--style", style, "--input", str(input_path), "--output", str(output_md)], cwd=repo_root, env=env or _revoice_env(), ) def _is_style_v2_6(style: str) -> bool: return "v2.6" in (style or "").lower() _INTRO_ASIDE_BANK_V2_6: dict[str, str] = { "A": "Most dossiers read as if physics is optional: eternal ROI, frictionless execution, boundless optimism. We strip away the gloss to reveal the operational reality.", "B": "Vendors sell capabilities; organizations inherit obligations. We read this document for the handoff moments where the promise turns into work.", "C": "In a market of promises, evidence is the only durable currency. We skip the adjectives and follow claims back to receipts and gates.", "D": "Every control deck assumes perfect execution. We read for the gap between the gate and the bypass.", "E": "Standards describe the happy path; reality is the edge case. We map where this specification hits the friction of existing infrastructure.", "F": "Tooling is cheap; ownership is the tax. We track the owner, the gate, and the expiry.", "G": "The source makes claims. We check whether those claims can be bound to receipts and stop conditions.", } def _classify_doc_for_intro_aside(*, normalized_text: str, source_basename: str) -> str: t = (normalized_text or "").lower() b = (source_basename or "").lower() if any(k in t for k in ["nist sp", "special publication", "rfc ", "iso 27", "iso/iec"]): return "standard" if any(k in b for k in ["nist", "rfc", "iso"]): return "standard" if any(k in t for k in ["guardrails", "policy", "requirements", "access request", "pull request", "pr checks", "why it matters"]): return "controls" if any(k in t for k in ["datasheet", "data sheet", "platform overview", "capabilities", "integrations", "modules", "features"]): return "capabilities" if any(k in t for k in ["strategy", "vision", "roadmap", "perspective", "state of", "framework"]): return "strategy" if any(k in t for k in ["white paper", "whitepaper", "solution brief", "ebook"]): return "marketing" return "unknown" def _select_intro_aside_v2_6( *, day_key: str, normalized_text: str, source_basename: str, used_window: deque[str], ) -> str: kind = _classify_doc_for_intro_aside(normalized_text=normalized_text, source_basename=source_basename) candidates_by_kind = { "standard": ["E", "G"], "controls": ["D", "F", "G"], "capabilities": ["B", "F", "G"], "strategy": ["C", "G", "F"], "marketing": ["A", "C", "G"], "unknown": ["G", "C", "F"], } keys = candidates_by_kind.get(kind) or candidates_by_kind["unknown"] variants = [_INTRO_ASIDE_BANK_V2_6[k] for k in keys if k in _INTRO_ASIDE_BANK_V2_6] if not variants: variants = list(_INTRO_ASIDE_BANK_V2_6.values()) digest = hashlib.sha256(f"{day_key}:intro_aside:v2.6:{kind}:{source_basename}".encode("utf-8", errors="replace")).digest() start = int.from_bytes(digest[:2], "big") % len(variants) for offset in range(len(variants)): candidate = variants[(start + offset) % len(variants)].strip() if not candidate: continue # v2.6: reduce template smell across daily drops (5-dossier rolling window). if candidate in used_window: continue used_window.append(candidate) return candidate # Fallback: allow reuse if we’re out of options (rare with 7-line bank + 5-window). candidate = variants[start].strip() if candidate: used_window.append(candidate) return candidate def _revoice_preflight(*, style: str, md_path: Path, source_path: Path) -> str: repo_root = _repo_root() proc = subprocess.run( ["python3", "-m", "revoice", "preflight", "--style", style, "--input", str(md_path), "--source", str(source_path)], cwd=str(repo_root), env=_revoice_env(), capture_output=True, text=True, ) if proc.returncode == 0: return "" if proc.returncode == 2: return (proc.stderr or proc.stdout or "").strip() raise RuntimeError(f"revoice preflight failed (code {proc.returncode}): {(proc.stderr or proc.stdout or '').strip()}") def _extract_first_claim(md: str) -> str: claims: list[str] = [] for line in md.splitlines(): m = re.match(r"^- The source claims: [“\"](?P.+?)[”\"]\s*$", line.strip()) if m: claims.append(m.group("q").strip()) if not claims: return "" def is_low_signal(claim: str) -> bool: c = (claim or "").strip() lower = c.lower() if "datasheet" in lower: return True if "all rights reserved" in lower or "copyright" in lower: return True # Very short fragments tend to be headers/footers or OCR junk. if len(c) < 40: return True return False # Prefer the first non-noise claim; fall back to the first claim if all are noisy. chosen = next((c for c in claims if not is_low_signal(c)), claims[0]) if len(chosen) > 160: return chosen[:157].rstrip() + "…" return chosen def _extract_first_dave_factor(md: str) -> str: for line in md.splitlines(): m = re.match(r"^>\s*\*\*The Dave Factor:\*\*\s*(?P.+?)\s*$", line.strip()) if m: text = m.group("t").strip() if len(text) > 180: return text[:177].rstrip() + "…" return text return "" def _sponsor_bumper(day_key: str) -> str: variants = [ "This episode brought to you by the exception half-life: temporary becomes permanent without automated expiry.", "Underwritten by the laws of incentives: dashboards observe, gates enforce. See verifiable traces at https://infrafabric.io", "Sponsored by operational realism: the roadmap is not the territory.", "A message from the gating problem: visibility without stop conditions is theater.", "This critique made possible by InfraFabric Red Team — publishing the gates your org must own. https://infrafabric.io", ] digest = hashlib.sha256(day_key.encode("utf-8", errors="replace")).digest() return variants[int.from_bytes(digest[:2], "big") % len(variants)] def _write_marketing( *, out_path: Path, day: DayConfig, next_day: DayConfig | None, base_url: str, source_url: str, dossier_md: str, stamp_square_url: str, hero_url: str, ) -> None: day_upper = day.day.upper() next_label = f"{next_day.day.upper()} — {next_day.edition} {next_day.hashtag}" if next_day else "Next week: new drops." dave_factor = _extract_first_dave_factor(dossier_md) or "The control drifts into a status update, and the status update becomes the control." claim = _extract_first_claim(dossier_md) quote_bullet = f"- The source claims: “{claim}”" if claim else "- (Add one short measurable source quote from the dossier’s Claims Register.)" lines = [ f"# Thread Pack — {day_upper} ({day.edition} Edition)", "", f"- Hashtag: {day.hashtag}", "- Schedule: 6:45 AM EST main drop (promo 6:00 AM; next-on 8:00 PM prior)", "", "## Post 0 — Next On (previous evening, 8:00 PM EST)", "", f"Tomorrow: {next_label}", "", "## Post 1 — Pre-Show Promo (6:00 AM EST)", "", f"{day.hashtag} EYES ONLY // DAVE", "", dave_factor, "", f"Stamp: {stamp_square_url}", f"Hero: {hero_url}", "", "## Post 2 — Main Thread (6:45 AM EST)", "", f"Shadow Dossier — {day.edition} Edition {day.hashtag}", "", f"Source: {source_url}", f"Full pack: {base_url}/{day.day}.pack.md", "", "## Post 3 — The Source Claims (quote-budget)", "", quote_bullet, "", "## Post 4 — Sponsor Bumper (mid-thread)", "", _sponsor_bumper(day.day), "", "## Post 5 — The Gate (Action Pack tease)", "", "Gate: Governance", 'Stop condition: No "rolled out" without an owner, a gate, and an expiry.', "", "## Post 6 — Trace + Download", "", f"Trace: {base_url}/{day.day}.trace.json", f"Shadow (md): {base_url}/{day.day}.shadow.md", "", "## Post 7 — Next Day Tease (end of thread)", "", f"Tomorrow 6:45 AM: {next_label}" if next_day else "Next: new drops.", "", ] out_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8") def _write_pack( *, out_path: Path, day: DayConfig, next_day: DayConfig | None, base_url: str, source_url: str, shadow_url: str, trace_url: str, marketing_url: str, trace_json: dict, marketing_md: str, shadow_md: str, ) -> None: next_link = f"{base_url}/{next_day.day}.pack.md" if next_day else "" lines = [ f"# InfraFabric External Review Pack — {day.day.upper()} ({day.edition} Edition)", "", "This is a single-file bundle intended for review environments that cannot reliably fetch multiple URLs.", "", "## Links", "", f"- Source: {source_url}", "", f"- Shadow dossier (download Markdown): {shadow_url}", "", f"- Trace (JSON): {trace_url}", "", f"- Marketing thread pack: {marketing_url}", "", f"- Pack (this file): {base_url}/{day.day}.pack.md", ] if next_link: lines.extend(["", f"- Coming next: {next_link}", ""]) else: lines.append("") lines.extend( [ "", "## Review instructions (portable)", "", "Hard rules:", "", "1) 100% factual: tag every non-trivial claim as [SOURCE]/[DOSSIER]/[TRACE]/[INFERENCE].", "", "2) Vendor-neutral: critique deployment conditions + org behaviors, not vendor intent/competence.", "", "3) Mirror discipline: follow the dossier’s section order; do not invent a new outline.", "", "", "Deliverables:", "", "A) 5–10 bullets: what works / what doesn’t (tag each)", "", "B) Scorecard (0–5): mirror integrity, layout fidelity, humor discipline, mermaid value, trace/demo value, CTA stealth", "", "C) Section-by-section critique (mirror headings): what’s mirrored, what’s missing, what feels templated/repeated", "", "D) Vendor-safe conclusion rewrite: success conditions / traps / questions-to-ask-vendor", "", "E) Patch suggestions (actionable): unified diffs preferred against bible + generator", "", "", "## Trace", "", "```json", "", json.dumps(trace_json, indent=2, sort_keys=False), "", "```", "", "", "## Marketing thread pack", "", "```markdown", "", marketing_md.strip(), "", "```", "", "", "## Shadow dossier (Markdown)", "", "```markdown", "", shadow_md.strip(), "", "```", "", ] ) out_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8") def _write_week_index(*, out_path: Path, week_title: str, base_url: str, days: list[DayConfig], source_links: dict[str, str]) -> None: lines = [ f"# InfraFabric External Review Pack — Week ({week_title})", "", f"This is the week bundle for **IF.DAVE.BIBLE {week_title}**. Review one day at a time.", "", f"Base: {base_url}/", "", "", "## Days", "", "| Day | Edition | Pack | Marketing | Shadow | Trace | Source |", "| --- | --- | --- | --- | --- | --- | --- |", ] for d in days: day_upper = d.day.upper() lines.append( "| " + " | ".join( [ day_upper, d.edition, f"{base_url}/{d.day}.pack.md", f"{base_url}/{d.day}.marketing.md", f"{base_url}/{d.day}.shadow.md", f"{base_url}/{d.day}.trace.json", source_links.get(d.day, d.source_url), ] ) + " |" ) lines.extend( [ "", "## Full offline week bundle", "", f"- Full week single-file pack: {base_url}/week.pack.md", "", ] ) out_path.write_text("\n".join(lines).strip() + "\n", encoding="utf-8") def _render_recap_source(*, base_url: str, days: list[DayConfig], highlight_by_day: dict[str, str]) -> str: rows = [] for d in days: if d.day in {"sat", "sun"}: continue rows.append(f"| {d.day.title()} | {d.edition} | {base_url}/{d.day}.pack.md | {base_url}/{d.day}.trace.json |") highlights = [] for d in days: if d.day in {"sat", "sun"}: continue sting = highlight_by_day.get(d.day, "") if sting: highlights.append(f"- **{d.edition}:** {sting}") if not highlights: highlights.append("- (highlights unavailable)") return "\n".join( [ "# Shadow Dossier — Weekly Recap Edition", "", "This recap aggregates the week’s drops (Mon–Fri) into one “what mattered / what broke / what to steal for Monday” artifact.", "", "## Week lineup (links)", "", "| Day | Edition | Pack | Trace |", "| --- | --- | --- | --- |", *rows, "", "## Highlights (one-line stings)", "", *highlights, "", "## What to steal (portable)", "", "1. Replace manual evidence with machine-verifiable signals (event type + emitter + freshness window).", "2. Treat exceptions as architecture unless auto-expiry is enforced.", "3. Never accept “rolled out” without opt-in/opt-out + stop conditions.", "", "## Poll (optional)", "", "Which failure mode hurts most in your org?", "- A) Evidence theater (screenshots/certs)", "- B) Exception creep", "- C) Dashboard storytelling", "- D) “Pilot” that never ends", "", ] ).strip() def _read_days_tsv(path: Path) -> list[DayConfig]: rows: list[DayConfig] = [] with path.open("r", encoding="utf-8") as f: reader = csv.DictReader(f, delimiter="\t") for row in reader: day = (row.get("day") or "").strip().lower() edition = (row.get("edition") or "").strip() hashtag = (row.get("hashtag") or "").strip() source_url = (row.get("source_url") or "").strip() if not day or not edition or not hashtag or not source_url: raise ValueError(f"Invalid row in {path}: {row}") rows.append(DayConfig(day=day, edition=edition, hashtag=hashtag, source_url=source_url)) return rows def main() -> int: p = argparse.ArgumentParser() p.add_argument("--days", required=True, help="TSV with columns: day, edition, hashtag, source_url") p.add_argument("--out", required=True, help="Output directory (build artifacts)") p.add_argument("--style", default="if.dave.v1.9", help="Dave style id (default: if.dave.v1.9)") p.add_argument("--base-url", required=True, help="Published base URL for week packs (no trailing slash)") p.add_argument("--source-prefix", default="https://infrafabric.io/static/source/", help="Where sources will be hosted") p.add_argument( "--note", default="static_week_v19", help="Trace note field (default: static_week_v19)", ) p.add_argument( "--stamp-square-url", default="https://infrafabric.io/static/hosted/review/assets/eyes-only/red-ream-600-600.png", help="Canonical square stamp image URL", ) p.add_argument( "--hero-url", default="https://infrafabric.io/static/hosted/review/assets/eyes-only/red-team-doc-1024-559.jpg", help="Canonical hero image URL", ) args = p.parse_args() out_dir = Path(args.out).resolve() sources_dir = out_dir / "sources" build_dir = out_dir / "build" build_dir.mkdir(parents=True, exist_ok=True) sources_dir.mkdir(parents=True, exist_ok=True) days = _read_days_tsv(Path(args.days)) by_day = {d.day: d for d in days} ordered = [by_day[k] for k in ["mon", "tue", "wed", "thu", "fri", "sat", "sun"] if k in by_day] if len(ordered) != 7: raise SystemExit("Expected 7 days (mon..sun) in TSV") source_links: dict[str, str] = {} highlight_by_day: dict[str, str] = {} used_intro_asides_v2_6: deque[str] = deque(maxlen=5) # First pass: download/generate sources (except recap), create shadow, trace, marketing, pack. for idx, day in enumerate(ordered): next_day = ordered[idx + 1] if idx + 1 < len(ordered) else None if day.source_url.upper() == "GENERATE": continue ext = _guess_ext(day.source_url, default=".pdf") src_path = sources_dir / f"{day.day}{ext}" _download(day.source_url, src_path) src_sha = _sha256_file(src_path) source_links[day.day] = f"{args.source_prefix}{src_sha}{ext}" # Keep extracted text for debugging (PDF only). if ext == ".pdf": _revoice_extract(input_path=src_path, output_txt=sources_dir / f"{day.day}.txt") shadow_path = build_dir / f"{day.day}.shadow.md" gen_env = _revoice_env() if _is_style_v2_6(args.style): normalized_text = "" txt_path = sources_dir / f"{day.day}.txt" if txt_path.exists(): normalized_text = txt_path.read_text(encoding="utf-8", errors="replace") aside = _select_intro_aside_v2_6( day_key=day.day, normalized_text=normalized_text, source_basename=src_path.name, used_window=used_intro_asides_v2_6, ) gen_env["REVOICE_EDITORIAL_ASIDE"] = aside _revoice_generate(style=args.style, input_path=src_path, output_md=shadow_path, env=gen_env) warnings = _revoice_preflight(style=args.style, md_path=shadow_path, source_path=src_path) out_sha = _sha256_file(shadow_path) trace = { "id": str(uuid.uuid4()), "status": "done", "createdAt": _utc_now(), "day": day.day, "edition": day.edition, "hashtag": day.hashtag, "style": args.style, "sourceSha256": src_sha, "outputSha256": out_sha, "warnings": warnings, "note": args.note, } trace_path = build_dir / f"{day.day}.trace.json" trace_path.write_text(json.dumps(trace, indent=2, sort_keys=False) + "\n", encoding="utf-8") shadow_md = shadow_path.read_text(encoding="utf-8", errors="replace") if day.day in {"mon", "tue", "wed", "thu", "fri"}: highlight_by_day[day.day] = _extract_first_dave_factor(shadow_md) marketing_path = build_dir / f"{day.day}.marketing.md" _write_marketing( out_path=marketing_path, day=day, next_day=next_day, base_url=args.base_url, source_url=source_links[day.day], dossier_md=shadow_md, stamp_square_url=args.stamp_square_url, hero_url=args.hero_url, ) pack_path = build_dir / f"{day.day}.pack.md" _write_pack( out_path=pack_path, day=day, next_day=next_day, base_url=args.base_url, source_url=source_links[day.day], shadow_url=f"{args.base_url}/{day.day}.shadow.md", trace_url=f"{args.base_url}/{day.day}.trace.json", marketing_url=f"{args.base_url}/{day.day}.marketing.md", trace_json=trace, marketing_md=marketing_path.read_text(encoding="utf-8"), shadow_md=shadow_md, ) # Build recap source for SAT, then run it through the same pipeline. recap = by_day.get("sat") if recap: recap_src = sources_dir / "sat.md" recap_src.write_text( _render_recap_source(base_url=args.base_url, days=ordered, highlight_by_day=highlight_by_day) + "\n", encoding="utf-8" ) recap_sha = _sha256_file(recap_src) source_links["sat"] = f"{args.source_prefix}{recap_sha}.md" shadow_path = build_dir / "sat.shadow.md" _revoice_generate(style=args.style, input_path=recap_src, output_md=shadow_path) warnings = _revoice_preflight(style=args.style, md_path=shadow_path, source_path=recap_src) out_sha = _sha256_file(shadow_path) trace = { "id": str(uuid.uuid4()), "status": "done", "createdAt": _utc_now(), "day": "sat", "edition": recap.edition, "hashtag": recap.hashtag, "style": args.style, "sourceSha256": recap_sha, "outputSha256": out_sha, "warnings": warnings, "note": args.note, } trace_path = build_dir / "sat.trace.json" trace_path.write_text(json.dumps(trace, indent=2, sort_keys=False) + "\n", encoding="utf-8") shadow_md = shadow_path.read_text(encoding="utf-8", errors="replace") marketing_path = build_dir / "sat.marketing.md" _write_marketing( out_path=marketing_path, day=recap, next_day=by_day.get("sun"), base_url=args.base_url, source_url=source_links["sat"], dossier_md=shadow_md, stamp_square_url=args.stamp_square_url, hero_url=args.hero_url, ) pack_path = build_dir / "sat.pack.md" _write_pack( out_path=pack_path, day=recap, next_day=by_day.get("sun"), base_url=args.base_url, source_url=source_links["sat"], shadow_url=f"{args.base_url}/sat.shadow.md", trace_url=f"{args.base_url}/sat.trace.json", marketing_url=f"{args.base_url}/sat.marketing.md", trace_json=trace, marketing_md=marketing_path.read_text(encoding="utf-8"), shadow_md=shadow_md, ) # Week index + full pack. m = re.search(r"(v\d+(?:\.\d+)*)", args.style) week_title = m.group(1) if m else args.style index_path = build_dir / "index.md" _write_week_index(out_path=index_path, week_title=week_title, base_url=args.base_url, days=ordered, source_links=source_links) week_pack_path = build_dir / "week.pack.md" body_parts = [ f"# InfraFabric External Review Pack — Full Week ({week_title})", "", "This file embeds all daily packs for sandboxed review environments. Review one day at a time.", "", f"Index: {args.base_url}/index.md", "", "---", "", ] for d in ordered: pack_file = build_dir / f"{d.day}.pack.md" if not pack_file.exists(): continue body_parts.append(f"## {d.day.upper()} ({d.edition} Edition)") body_parts.append("") body_parts.append(pack_file.read_text(encoding="utf-8", errors="replace").strip()) body_parts.append("") body_parts.append("---") body_parts.append("") week_pack_path.write_text("\n".join(body_parts).strip() + "\n", encoding="utf-8") # Hash sidecars for everything in build dir. for pth in sorted(build_dir.iterdir()): if pth.is_file() and not pth.name.endswith(".sha256"): _write_sha256_sidecar(pth) # Write resolved source manifest for publishing. manifest = out_dir / "source_manifest.json" manifest.write_text(json.dumps({"sources": source_links}, indent=2, sort_keys=True) + "\n", encoding="utf-8") return 0 if __name__ == "__main__": raise SystemExit(main())