Infrafabric-POC-docs/tools/generate_monikers_used.py

65 lines
1.9 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import re
from collections import Counter
from pathlib import Path
import yaml
IF_TOKEN_RE = re.compile(r"\bIF\.[A-Za-z0-9][A-Za-z0-9_.-]*\b")
IF_LOWER_TOKEN_RE = re.compile(r"\bif\.[A-Za-z0-9][A-Za-z0-9_.-]*\b")
def _render_table(counter: Counter[str]) -> str:
lines = ["| Token | Count |", "|---|---:|"]
for token, count in counter.most_common():
lines.append(f"| `{token}` | {count} |")
return "\n".join(lines) + "\n"
def main() -> int:
repo_root = Path(__file__).resolve().parents[1]
dossier = repo_root / "DANNY_STOCKER_INFRAFABRIC_DOSSIER.md"
registry_path = repo_root / "IF_NAMING_REGISTRY.yaml"
out_path = repo_root / "IF_MONIKERS_USED.md"
text = dossier.read_text(encoding="utf-8")
upper = Counter(IF_TOKEN_RE.findall(text))
lower = Counter(IF_LOWER_TOKEN_RE.findall(text))
forbidden: list[str] = []
if registry_path.exists():
registry = yaml.safe_load(registry_path.read_text(encoding="utf-8")) or {}
forbidden = list(registry.get("forbidden", []) or [])
forbidden_hits = [tok for tok in forbidden if tok and tok in text]
parts: list[str] = []
parts.append(f"# IF.* / if.* monikers found in `{dossier.name}`\n")
if forbidden_hits:
parts.append("## Forbidden tokens (should be zero)\n")
for tok in forbidden_hits:
parts.append(f"- `{tok}`\n")
parts.append("\n")
parts.append("## IF.* (protocol/moniker tokens)\n")
parts.append(f"Unique: {len(upper)}\n\n")
parts.append(_render_table(upper))
parts.append("\n")
parts.append("## if.* (lowercase tokens)\n")
parts.append(f"Unique: {len(lower)}\n\n")
parts.append(_render_table(lower))
out_path.write_text("".join(parts), encoding="utf-8")
print(f"wrote: {out_path}")
return 0
if __name__ == "__main__":
raise SystemExit(main())