re-voice/tools/mermaid/mermaid-self-heal.js

315 lines
9.6 KiB
JavaScript
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* Mermaid Self-Healing Pipeline (user-provided "95%+ reliability" edition)
*
* Usage:
* node tools/mermaid/mermaid-self-heal.js <file-or-dir> [...]
*
* Notes:
* - Edits Markdown files in-place, rewriting ```mermaid fences.
* - If `mmdc` (mermaid-cli) is available in PATH, it is used for validation.
* - If `mmdc` is missing, the script still applies repairs but skips validation.
*/
const fs = require("fs");
const path = require("path");
const os = require("os");
const { execSync } = require("child_process");
const SHAPES = [
"\\[\\[([^\\]]+)\\]\\]", // stadium
"\\[\\(\\([^\\)]+\\)\\)\\]", // cylindrical
"\\[\\(/([^\\)]+)\\)\\]\\]", // rounded rect?
"\\[([^\\]]+)\\]", // rectangle (default)
"\\(\\(([^\\)]+)\\)\\)", // circle
"\\(\\{([^\\}]+)\\}\\)", // diamond
"\\(\\[([^\\]]+)\\]\\)", // hex
"\\[\\/([^\\]]+)\\/\\]", // parallelogram
"\\[\\\\([^\\]]+)\\\\\\]", // alt parallelogram
"\\{\\{([^\\}]+)\\}\\}", // stadium alt
"\\(\\{([^\\}]+)\\}\\)", // subroutine
"\\(\\(([^\\)]+)\\)\\)", // circle double
];
const SHAPE_REGEX = new RegExp(SHAPES.map((s) => `(${s})`).join("|"));
function sanitizeAndNormalize(raw) {
let code =
String(raw || "")
.replace(/[\u00A0\u200B\u200E\uFEFF\u2060]/g, "") // invisible
.replace(/\r\n?/g, "\n")
.replace(/\t/g, " ")
.trim() + "\n";
// Force header to very first line
const lines = code.split("\n");
const firstContent = lines.findIndex((l) => l.trim());
if (firstContent > 0) {
const header = lines.splice(firstContent, 1)[0];
lines.unshift(header.trim());
code = lines.join("\n");
}
return code;
}
function forceValidId(id) {
if (/^[A-Za-z_][A-Za-z0-9_]*$/.test(id)) return id;
let clean = String(id || "")
.replace(/[^A-Za-z0-9_]/g, "_")
.replace(/^_+/, "")
.replace(/_+$/, "");
if (!clean) clean = "node";
if (/^\d/.test(clean)) clean = "_" + clean;
return clean;
}
function quoteLabel(label) {
const s = String(label || "");
if (!s.includes("\n") && /^[\w\s.,\-–—]+$/.test(s) && !/[":|]/.test(s)) return s;
return `"${s.replace(/"/g, "#34;").replace(/\n/g, "\\n")}"`;
}
function repairNodesAndLabels(code) {
// First pass fix IDs
code = code.replace(/^(\s*)([^\s\[\](){}]+)(\s*[[\](){}])/gm, (_m, indent, id, shape) => {
return `${indent}${forceValidId(id)}${shape}`;
});
// Second pass quote shape labels (correctly) for common node syntaxes.
const esc = (s) => String(s || "").replace(/"/g, "#34;").replace(/\n/g, "\\n");
const alreadyQuoted = (s) => {
const t = String(s || "").trim();
return t.length >= 2 && t.startsWith('"') && t.endsWith('"');
};
// [label]
code = code.replace(/(\b[^\s\[\](){}]+)\[([^\]\n]*)\]/g, (_m, id, label) => {
if (alreadyQuoted(label)) return `${id}[${label}]`;
return `${id}["${esc(label)}"]`;
});
return code;
}
function detectType(code) {
const first = String(code || "").split("\n", 1)[0].toLowerCase();
if (first.includes("sequencediagram")) return "sequence";
if (first.includes("classdiagram")) return "class";
if (first.includes("statediagram")) return "state";
if (first.includes("gantt")) return "gantt";
if (first.includes("erdiagram")) return "er";
if (first.includes("pie")) return "pie";
if (first.includes("gitgraph")) return "gitgraph";
if (first.includes("mindmap")) return "mindmap";
if (first.includes("timeline")) return "timeline";
if (first.includes("quadrantchart")) return "quadrantchart";
if (first.includes("xychart")) return "xychart";
return "flowchart";
}
function sequenceSpecificFixes(code) {
const participants = new Set();
const participantLines = [];
const lines = String(code || "").split("\n");
const cleaned = [];
for (let line of lines) {
const pl = line.match(/^\s*participant\s+(.+)/i);
if (pl) {
const id = forceValidId(pl[1].split(" as ")[0].trim());
participants.add(id);
participantLines.push(`participant ${id}`);
} else {
cleaned.push(line);
}
}
// Re-inject participants at top
let result = [...participantLines, ...cleaned].join("\n");
// Balance alt/loop/par/opt/critical/rect
const blocks = ["alt", "else", "loop", "par", "opt", "critical", "rect rgb(0,0,0)"];
let stack = [];
for (let line of result.split("\n")) {
const trimmed = line.trim();
if (blocks.some((b) => trimmed.startsWith(b))) stack.push(trimmed.split(" ")[0]);
if (trimmed === "end") {
if (stack.length) stack.pop();
}
}
while (stack.length) {
result += "\nend";
stack.pop();
}
return result;
}
function balanceSubgraphs(code) {
let depth = 0;
const lines = String(code || "").split("\n");
const result = [];
for (let line of lines) {
if (/\bsubgraph\b/i.test(line)) depth++;
if (/\bend\b/i.test(line)) depth = Math.max(0, depth - 1);
result.push(line);
}
while (depth-- > 0) result.push("end");
return result.join("\n");
}
function ensureHeaderAtTop(code) {
const lines = String(code || "").replace(/\r\n?/g, "\n").split("\n");
const headerRe =
/^(flowchart|graph|sequenceDiagram|classDiagram|stateDiagram(?:-v2)?|gantt|ganttChart|erDiagram|pie|gitgraph|mindmap|timeline|quadrantChart|xychart-beta|xychart)\b/i;
const isInit = (l) => String(l || "").trim().startsWith("%%{");
const initLine = lines.length > 0 && isInit(lines[0]) ? String(lines[0] || "").trim() : null;
let headerIdx = -1;
for (let i = initLine ? 1 : 0; i < lines.length; i++) {
const t = String(lines[i] || "").trim();
if (headerRe.test(t)) {
headerIdx = i;
break;
}
}
let headerLine = headerIdx >= 0 ? String(lines[headerIdx] || "").trim() : "flowchart TD";
headerLine = headerLine.replace(/^graph\b/i, "flowchart");
if (/^flowchart\b/i.test(headerLine) && !/\b(LR|RL|TD|TB|BT)\b/i.test(headerLine)) {
headerLine = "flowchart TD";
}
const out = [];
if (initLine) out.push(initLine);
out.push(headerLine);
for (let i = 0; i < lines.length; i++) {
if (initLine && i === 0) continue;
if (headerIdx === i) continue;
const l = String(lines[i] || "");
if (!l.trim()) continue;
out.push(l);
}
return out.join("\n").trim() + "\n";
}
function selfHealMermaid(block) {
let code = ensureHeaderAtTop(sanitizeAndNormalize(block));
const t = detectType(code);
if (t === "flowchart") {
code = repairNodesAndLabels(code);
code = balanceSubgraphs(code);
}
// Final normalisation
code = code.replace(/-\s+->/g, "-->").replace(/==+/g, "==>").replace(/-\./g, "-.");
return code;
}
function hasCmd(cmd) {
try {
execSync(`command -v ${cmd}`, { stdio: "ignore" });
return true;
} catch {
return false;
}
}
function validateWithMmdc(inputMmdText) {
if (!hasCmd("mmdc")) return { ok: null, stderr: "mmdc_not_found" };
const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "mmdc-heal-"));
const inFile = path.join(tmpDir, "temp.mmd");
fs.writeFileSync(inFile, inputMmdText, "utf8");
try {
execSync(`mmdc -i ${JSON.stringify(inFile)} -o /dev/null --quiet`, { stdio: "pipe" });
return { ok: true, stderr: "" };
} catch (e) {
const stderr =
e && typeof e === "object" && e.stderr && Buffer.isBuffer(e.stderr)
? e.stderr.toString("utf8")
: e && typeof e === "object" && typeof e.message === "string"
? e.message
: "mmdc_failed";
return { ok: false, stderr };
} finally {
try {
fs.rmSync(tmpDir, { recursive: true, force: true });
} catch {}
}
}
function healMarkdownFile(filePath) {
let content = fs.readFileSync(filePath, "utf8");
content = content.replace(/```mermaid\s*([\s\S]*?)```/g, (_match, rawBlock) => {
let attempt = selfHealMermaid(rawBlock);
let healed = false;
for (let i = 0; i < 5; i++) {
const v = validateWithMmdc(attempt);
if (v.ok === null) {
healed = true; // no validator available; still apply healing output
break;
}
if (v.ok === true) {
healed = true;
break;
}
const err = v.stderr || "";
const lineMatch = err.match(/line (\d+)/i);
const line = lineMatch ? parseInt(lineMatch[1], 10) - 2 : null; // mmdc counts header as line 1 or 2
if (err.includes("Parse error") && line !== null) {
let lines = attempt.split("\n");
let bad = lines[line] || "";
// Last-ditch quote everything on that line
bad = bad.replace(/\[([^\]"][^\]]*)\]/g, '["$1"]').replace(/\(([^)"]+)\)/g, '("$1")');
lines[line] = bad;
attempt = lines.join("\n");
}
}
const final = healed ? attempt : `%% SELF-HEAL FAILED AFTER 5 ATTEMPTS\n${attempt}`;
return "```mermaid\n" + final + "\n```";
});
fs.writeFileSync(filePath, content);
}
function walkMarkdownFiles(startPath) {
const st = fs.statSync(startPath);
if (st.isFile()) {
if (startPath.toLowerCase().endsWith(".md") || startPath.toLowerCase().endsWith(".markdown")) return [startPath];
return [];
}
if (!st.isDirectory()) return [];
const out = [];
const entries = fs.readdirSync(startPath, { withFileTypes: true });
for (const e of entries) {
const p = path.join(startPath, e.name);
if (e.isDirectory()) out.push(...walkMarkdownFiles(p));
else if (e.isFile() && (p.toLowerCase().endsWith(".md") || p.toLowerCase().endsWith(".markdown"))) out.push(p);
}
return out;
}
function main(argv) {
const targets = argv.slice(2);
if (!targets.length) {
console.error("Usage: node tools/mermaid/mermaid-self-heal.js <file-or-dir> [...]");
process.exit(2);
}
for (const t of targets) {
const abs = path.resolve(t);
const files = walkMarkdownFiles(abs);
for (const f of files) healMarkdownFile(f);
}
}
if (require.main === module) main(process.argv);