from __future__ import annotations import hashlib import json import re _EMOJI_RE = re.compile( "[" # best-effort emoji detection (not perfect) "\U0001F300-\U0001FAFF" # misc symbols & pictographs + extended "\u2600-\u27BF" # dingbats / misc symbols "]+" ) def lint_markdown(*, style_id: str, markdown: str) -> list[str]: require_mermaid = style_id.lower() in { "if.dave.v1.2", "if.dave.v1.3", "if.dave.v1.6", "if.dave.v1.7", "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", "if.dave.v2.1", "if.dave.v2.2", "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", "if://bible/dave/v1.2", "if://bible/dave/v1.3", "if://bible/dave/v1.6", "if://bible/dave/v1.7", "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", "if://bible/dave/v2.1", "if://bible/dave/v2.2", "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", } min_mermaid = ( 2 if style_id.lower() in { "if.dave.v1.7", "if://bible/dave/v1.7", "if.dave.v1.8", "if://bible/dave/v1.8", "if.dave.v1.9", "if://bible/dave/v1.9", "if.dave.v2.0", "if://bible/dave/v2.0", "if.dave.v2.1", "if://bible/dave/v2.1", "if.dave.v2.2", "if://bible/dave/v2.2", "if.dave.v2.3", "if://bible/dave/v2.3", } else (1 if require_mermaid else 0) ) if style_id.lower() in { "if.dave.v1", "if.dave.v1.1", "if.dave.v1.2", "if.dave.v1.3", "if.dave.v1.6", "if.dave.v1.7", "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", "if.dave.v2.1", "if.dave.v2.2", "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", "if://bible/dave/v1.0", "if://bible/dave/v1.1", "if://bible/dave/v1.2", "if://bible/dave/v1.3", "if://bible/dave/v1.6", "if://bible/dave/v1.7", "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", "if://bible/dave/v2.1", "if://bible/dave/v2.2", "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", }: return _lint_dave(markdown, source_text=None, min_mermaid=min_mermaid) return [f"Unknown style id: {style_id}"] def lint_markdown_with_source(*, style_id: str, markdown: str, source_text: str) -> list[str]: require_mermaid = style_id.lower() in { "if.dave.v1.2", "if.dave.v1.3", "if.dave.v1.6", "if.dave.v1.7", "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", "if.dave.v2.1", "if.dave.v2.2", "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", "if://bible/dave/v1.2", "if://bible/dave/v1.3", "if://bible/dave/v1.6", "if://bible/dave/v1.7", "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", "if://bible/dave/v2.1", "if://bible/dave/v2.2", "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", } min_mermaid = ( 2 if style_id.lower() in { "if.dave.v1.7", "if://bible/dave/v1.7", "if.dave.v1.8", "if://bible/dave/v1.8", "if.dave.v1.9", "if://bible/dave/v1.9", "if.dave.v2.0", "if://bible/dave/v2.0", "if.dave.v2.1", "if://bible/dave/v2.1", "if.dave.v2.2", "if://bible/dave/v2.2", "if.dave.v2.3", "if://bible/dave/v2.3", } else (1 if require_mermaid else 0) ) if style_id.lower() in { "if.dave.v1", "if.dave.v1.1", "if.dave.v1.2", "if.dave.v1.3", "if.dave.v1.6", "if.dave.v1.7", "if.dave.v1.8", "if.dave.v1.9", "if.dave.v2.0", "if.dave.v2.1", "if.dave.v2.2", "if.dave.v2.3", "if.dave.fr.v1.2", "if.dave.fr.v1.3", "dave", "if://bible/dave/v1.0", "if://bible/dave/v1.1", "if://bible/dave/v1.2", "if://bible/dave/v1.3", "if://bible/dave/v1.6", "if://bible/dave/v1.7", "if://bible/dave/v1.8", "if://bible/dave/v1.9", "if://bible/dave/v2.0", "if://bible/dave/v2.1", "if://bible/dave/v2.2", "if://bible/dave/v2.3", "if://bible/dave/fr/v1.2", "if://bible/dave/fr/v1.3", }: return _lint_dave(markdown, source_text=source_text, min_mermaid=min_mermaid) return [f"Unknown style id: {style_id}"] def _lint_dave(md: str, *, source_text: str | None, min_mermaid: int) -> list[str]: issues: list[str] = [] if "InfraFabric Red Team Footer" not in md: issues.append("Missing required footer: InfraFabric Red Team Footer") if "Standard Dave Footer" not in md: issues.append("Missing required footer: Standard Dave Footer") allowed_emojis = set(_EMOJI_RE.findall(source_text or "")) present_emojis = set(_EMOJI_RE.findall(md)) disallowed = sorted(present_emojis - allowed_emojis) if disallowed: issues.append( "Contains emoji not present in source: " + " ".join(disallowed[:10]) + (" ..." if len(disallowed) > 10 else "") ) mermaid_count = len(re.findall(r"```mermaid\b", md)) if min_mermaid and mermaid_count < min_mermaid: issues.append(f"Missing required Mermaid diagrams: expected>={min_mermaid} got={mermaid_count}") issues.extend(_lint_duplicate_mermaid(md)) issues.extend(_lint_duplicate_dave_factor(md)) issues.extend(_lint_json_blocks(md)) issues.extend(_lint_repeated_lines(md)) return issues def _stable_hash(text: str) -> str: return hashlib.sha256(text.encode("utf-8", errors="replace")).hexdigest() def _lint_duplicate_mermaid(md: str) -> list[str]: blocks = re.findall(r"```mermaid\s*([\s\S]*?)```", md, flags=re.MULTILINE) if len(blocks) <= 1: return [] counts: dict[str, int] = {} for block in blocks: normalized = "\n".join([ln.rstrip() for ln in str(block).strip().splitlines() if ln.strip()]) if not normalized: continue h = _stable_hash(normalized) counts[h] = counts.get(h, 0) + 1 issues: list[str] = [] for h, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])): if n > 1: issues.append(f"Duplicate Mermaid diagram appears {n} times (sha256:{h[:12]})") return issues def _lint_duplicate_dave_factor(md: str) -> list[str]: lines = md.splitlines() blocks: list[str] = [] i = 0 while i < len(lines): line = lines[i].rstrip() if line.startswith("> **The Dave Factor:**"): block_lines = [line.strip()] if i + 1 < len(lines) and lines[i + 1].rstrip().startswith("> **Countermeasure:**"): block_lines.append(lines[i + 1].strip()) i += 1 blocks.append("\n".join(block_lines)) i += 1 if len(blocks) <= 1: return [] counts: dict[str, int] = {} for block in blocks: h = _stable_hash(block.strip()) counts[h] = counts.get(h, 0) + 1 issues: list[str] = [] for h, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])): if n > 1: issues.append(f"Duplicate Dave Factor callout appears {n} times (sha256:{h[:12]})") return issues def _lint_json_blocks(md: str) -> list[str]: blocks = re.findall(r"```json\s*([\s\S]*?)```", md, flags=re.MULTILINE) issues: list[str] = [] for idx, raw in enumerate(blocks, 1): text = str(raw).strip() if not text: continue try: json.loads(text) except Exception as e: issues.append(f"Invalid JSON code block #{idx}: {e}") return issues def _lint_repeated_lines(md: str) -> list[str]: lines = md.splitlines() counts: dict[str, int] = {} in_fence = False fence = None for ln in lines: stripped = ln.strip() if stripped.startswith("```"): if not in_fence: in_fence = True fence = stripped else: in_fence = False fence = None continue if in_fence: continue if not stripped: continue if stripped.startswith("#"): continue if stripped.startswith(">"): continue # Action Pack backlog uses consistent acceptance criteria by design. if stripped.startswith("- Acceptance:"): continue if len(stripped) < 18: continue counts[stripped] = counts.get(stripped, 0) + 1 issues: list[str] = [] for line, n in sorted(counts.items(), key=lambda kv: (-kv[1], kv[0])): if n >= 3: issues.append(f"Repeated line appears {n} times: {line[:120]}{'…' if len(line) > 120 else ''}") return issues