re-voice/src/revoice/lint.py

from __future__ import annotations

import re


_EMOJI_RE = re.compile(
    "["  # best-effort emoji detection (not perfect)
    "\U0001F300-\U0001FAFF"  # misc symbols & pictographs + extended
    "\u2600-\u27BF"  # dingbats / misc symbols
    "]+"
)


def lint_markdown(*, style_id: str, markdown: str) -> list[str]:
    if style_id.lower() in {"if.dave.v1", "dave", "if://bible/dave/v1.0"}:
        return _lint_dave_v1(markdown)
    return [f"Unknown style id: {style_id}"]


def _lint_dave_v1(md: str) -> list[str]:
    issues: list[str] = []

    if "Standard Dave Footer" not in md:
        issues.append("Missing required footer: Standard Dave Footer")

    md_wo_code = re.sub(r"```.*?```", "", md, flags=re.S)
    paragraphs = _split_paragraphs(md_wo_code)
    for idx, para in enumerate(paragraphs, start=1):
        if re.match(r"^(-{3,}|\*{3,}|_{3,})$", para.strip()):
            continue
        if not _EMOJI_RE.search(para):
            issues.append(f"Paragraph {idx} missing emoji")

    if re.search(r"(?m)\\bI\\b", md):
        issues.append('Contains disallowed first-person singular ("I")')

    return issues


def _split_paragraphs(md: str) -> list[str]:
    blocks: list[str] = []
    current: list[str] = []

    for line in md.splitlines():
        if line.strip() == "":
            if current:
                blocks.append("\n".join(current).strip())
                current = []
            continue

        current.append(line)

    if current:
        blocks.append("\n".join(current).strip())

    return [b for b in blocks if b]