#!/usr/bin/env python3 from __future__ import annotations import re from collections import Counter from pathlib import Path import yaml IF_TOKEN_RE = re.compile(r"\bIF\.[A-Za-z0-9][A-Za-z0-9_.-]*\b") IF_LOWER_TOKEN_RE = re.compile(r"\bif\.[A-Za-z0-9][A-Za-z0-9_.-]*\b") def _render_table(counter: Counter[str]) -> str: lines = ["| Token | Count |", "|---|---:|"] for token, count in counter.most_common(): lines.append(f"| `{token}` | {count} |") return "\n".join(lines) + "\n" def main() -> int: repo_root = Path(__file__).resolve().parents[1] dossier = repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md" registry_path = repo_root / "IF_NAMING_REGISTRY.yaml" out_path = repo_root / "IF_MONIKERS_USED.md" text = dossier.read_text(encoding="utf-8") upper = Counter(IF_TOKEN_RE.findall(text)) lower = Counter(IF_LOWER_TOKEN_RE.findall(text)) forbidden: list[str] = [] if registry_path.exists(): registry = yaml.safe_load(registry_path.read_text(encoding="utf-8")) or {} forbidden = list(registry.get("forbidden", []) or []) forbidden_hits = [tok for tok in forbidden if tok and tok in text] parts: list[str] = [] parts.append(f"# IF.* / if.* monikers found in `{dossier.name}`\n") if forbidden_hits: parts.append("## Forbidden tokens (should be zero)\n") for tok in forbidden_hits: parts.append(f"- `{tok}`\n") parts.append("\n") parts.append("## IF.* (protocol/moniker tokens)\n") parts.append(f"Unique: {len(upper)}\n\n") parts.append(_render_table(upper)) parts.append("\n") parts.append("## if.* (lowercase tokens)\n") parts.append(f"Unique: {len(lower)}\n\n") parts.append(_render_table(lower)) out_path.write_text("".join(parts), encoding="utf-8") print(f"wrote: {out_path}") return 0 if __name__ == "__main__": raise SystemExit(main())