re-voice/src/revoice/lint.py

56 lines
1.5 KiB
Python

from __future__ import annotations
import re
_EMOJI_RE = re.compile(
"[" # best-effort emoji detection (not perfect)
"\U0001F300-\U0001FAFF" # misc symbols & pictographs + extended
"\u2600-\u27BF" # dingbats / misc symbols
"]+"
)
def lint_markdown(*, style_id: str, markdown: str) -> list[str]:
if style_id.lower() in {"if.dave.v1", "dave", "if://bible/dave/v1.0"}:
return _lint_dave_v1(markdown)
return [f"Unknown style id: {style_id}"]
def _lint_dave_v1(md: str) -> list[str]:
issues: list[str] = []
if "Standard Dave Footer" not in md:
issues.append("Missing required footer: Standard Dave Footer")
md_wo_code = re.sub(r"```.*?```", "", md, flags=re.S)
paragraphs = _split_paragraphs(md_wo_code)
for idx, para in enumerate(paragraphs, start=1):
if re.match(r"^(-{3,}|\*{3,}|_{3,})$", para.strip()):
continue
if not _EMOJI_RE.search(para):
issues.append(f"Paragraph {idx} missing emoji")
if re.search(r"(?m)\\bI\\b", md):
issues.append('Contains disallowed first-person singular ("I")')
return issues
def _split_paragraphs(md: str) -> list[str]:
blocks: list[str] = []
current: list[str] = []
for line in md.splitlines():
if line.strip() == "":
if current:
blocks.append("\n".join(current).strip())
current = []
continue
current.append(line)
if current:
blocks.append("\n".join(current).strip())
return [b for b in blocks if b]