#!/usr/bin/env python3 from __future__ import annotations import argparse import re from collections import Counter from pathlib import Path import yaml SCAFFOLD_HEADING_RE = re.compile(r"^(#{1,6})\s+(HOOK|FLAW|SETUP|TENSION|TWIST|PUNCH):\s+(.*)\s*$") LEGACY_BLOCK_RE = re.compile(r"\(legacy:[^)]+\)") def _shield_legacy_blocks(text: str) -> tuple[str, list[str]]: """ Prevent legacy-mapping annotations like `(legacy: ...)` from being rewritten by the refactor. These blocks are documentation/traceability artifacts and should remain stable even as we refactor the surrounding text to canonical names. """ blocks: list[str] = [] def repl(match: re.Match[str]) -> str: blocks.append(match.group(0)) return f"__IF_LEGACY_BLOCK_{len(blocks) - 1}__" return LEGACY_BLOCK_RE.sub(repl, text), blocks def _unshield_legacy_blocks(text: str, blocks: list[str]) -> str: for index, block in enumerate(blocks): text = text.replace(f"__IF_LEGACY_BLOCK_{index}__", block) return text def _build_legacy_map(registry: dict) -> dict[str, str]: names = registry.get("names", {}) or {} legacy_to_canonical: dict[str, str] = {} for canonical, meta in names.items(): legacy = (meta or {}).get("legacy", []) or [] for legacy_token in legacy: if legacy_token in legacy_to_canonical and legacy_to_canonical[legacy_token] != canonical: raise ValueError( f"legacy token {legacy_token!r} maps to multiple canonicals: " f"{legacy_to_canonical[legacy_token]!r} and {canonical!r}" ) legacy_to_canonical[legacy_token] = canonical return legacy_to_canonical def _safe_token_regex(token: str) -> re.Pattern[str]: # Replace token only when it’s not inside a larger identifier/path. # - Disallow alnum/_ on the left (word char) to avoid mid-word matches. # - Disallow alnum/_ or '/' or '-' on the right to avoid touching URLs, paths, file names, # or hyphenated “subtokens” like IF.guard-POC. return re.compile(rf"(? tuple[str, Counter[str]]: stats: Counter[str] = Counter() # 1) Remove visible scaffolding labels from headings. out_lines: list[str] = [] for raw in text.splitlines(True): line = raw.rstrip("\n") match = SCAFFOLD_HEADING_RE.match(line) if match: hashes, _, title = match.groups() out_lines.append(f"{hashes} {title}\n") stats["__scaffold_heading_renames__"] += 1 continue out_lines.append(raw) out = "".join(out_lines) # Shield `(legacy: ...)` blocks so they don't become nonsense like `(legacy: IF.GOV.PANEL)`. out, legacy_blocks = _shield_legacy_blocks(out) # 2) Apply legacy → canonical replacements. # Sort by length so longer tokens are replaced first (defensive; avoids any weird overlaps). legacy_items = sorted(legacy_map.items(), key=lambda kv: len(kv[0]), reverse=True) for legacy_token, canonical in legacy_items: if canonical in skip_canonicals: continue pattern = _safe_token_regex(legacy_token) out, n = pattern.subn(canonical, out) if n: stats[legacy_token] += n out = _unshield_legacy_blocks(out, legacy_blocks) return out, stats def main() -> int: parser = argparse.ArgumentParser(description="Apply IF naming refactor + remove scaffold headings.") parser.add_argument( "--registry", type=Path, default=Path(__file__).resolve().parents[1] / "IF_NAMING_REGISTRY.yaml", help="Path to IF_NAMING_REGISTRY.yaml", ) parser.add_argument( "--file", type=Path, default=Path(__file__).resolve().parents[1] / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md", help="Markdown file to refactor in-place", ) parser.add_argument( "--skip-canonical", action="append", default=["IF.AUDIT.TRAIL"], help="Canonical name(s) to skip when applying legacy replacements (repeatable).", ) args = parser.parse_args() registry = yaml.safe_load(args.registry.read_text(encoding="utf-8")) legacy_map = _build_legacy_map(registry) content = args.file.read_text(encoding="utf-8") updated, stats = refactor_text( content, legacy_map=legacy_map, skip_canonicals=set(args.skip_canonical or []) ) if updated == content: print("no changes") return 0 args.file.write_text(updated, encoding="utf-8") print(f"updated: {args.file}") if stats: print("changes:") for key, value in stats.most_common(): print(f" {key}: {value}") return 0 if __name__ == "__main__": raise SystemExit(main())