render: keep headings with next blocks

2026-01-08 22:50:34 +00:00 · 2026-01-08 22:50:34 +00:00 · 01d082b038
commit 01d082b038
parent f8808cecd1
3 changed files with 194 additions and 63 deletions
--- a/src/iftypeset/css_gen.py
+++ b/src/iftypeset/css_gen.py
@ -261,6 +261,12 @@ def generate_profile_css(profile: dict[str, Any]) -> CssOutput:
    css.append("}")
    css.append("")

+    css.append(".if-keep-with-next {")
+    css.append("  break-inside: avoid;")
+    css.append("  page-break-inside: avoid;")
+    css.append("}")
+    css.append("")
+
    css.append(f"h1 {{ font-size: {h1_size}; }}")
    css.append(f"h2 {{ font-size: {h2_size}; }}")
    css.append(f"h3 {{ font-size: {h3_size}; }}")
--- a/src/iftypeset/rendering.py
+++ b/src/iftypeset/rendering.py
@ -290,8 +290,12 @@ def _render_doc(
    body_lines: list[str] = []
    section_open = False
    current_section_slug: str | None = None
+    keep_headings = _headings_keep_enabled(profile)
    body_lines.append("<main>")
-    for block in doc.blocks:
+    blocks = doc.blocks
+    i = 0
+    while i < len(blocks):
+        block = blocks[i]
        if block.type == "heading":
            level = block.level or 1
            slug = _unique_slug(block.text, heading_ids)
@ -301,71 +305,43 @@ def _render_doc(
                body_lines.append(f"<section class=\"if-section if-section-{slug}\">")
                section_open = True
                current_section_slug = slug
-            body_lines.append(
+            heading_html = (
                f"<h{level} id=\"{slug}\">{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</h{level}>"
            )
+            if keep_headings and level <= 3:
+                keep_indices = _keep_with_next_indices(blocks, i)
+                if keep_indices:
+                    body_lines.append("<div class=\"if-keep-with-next\">")
+                    body_lines.append(heading_html)
+                    for j in keep_indices:
+                        _render_non_heading_block(
+                            blocks[j],
+                            body_lines=body_lines,
+                            profile=profile,
+                            base_path=base_path,
+                            self_contained=self_contained,
+                            warnings=warnings,
+                            inline_opts=inline_opts,
+                            current_section_slug=current_section_slug,
+                        )
+                    body_lines.append("</div>")
+                    i = max(keep_indices) + 1
+                    continue
+            body_lines.append(heading_html)
+            i += 1
            continue
-        if block.type == "paragraph":
-            img_only = _image_only_paragraph(block.text)
-            if img_only:
-                alt, url = img_only
-                src = _resolve_image_src(url, base_path, self_contained, warnings)
-                body_lines.append("<figure class=\"if-figure\">")
-                body_lines.append(f"  <img src=\"{html.escape(src)}\" alt=\"{html.escape(alt)}\">")
-                body_lines.append("</figure>")
-            else:
-                ref_id = _leading_citation_id(block.text)
-                attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
-                body_lines.append(
-                    f"<p{attr}>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p>"
-                )
-            continue
-        if block.type == "list":
-            tag = "ol" if block.ordered else "ul"
-            body_lines.append(f"<{tag}>")
-            for item in block.items:
-                groomed = _groom_list_item_text(item, profile, current_section_slug)
-                rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
-                rendered = rendered.replace(_NBSP_TOKEN, "&nbsp;")
-                body_lines.append(
-                    f"  <li>{rendered}</li>"
-                )
-            body_lines.append(f"</{tag}>")
-            continue
-        if block.type == "code":
-            lang = block.info.strip()
-            class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
-            body_lines.append(f"<pre><code{class_attr}>{html.escape(block.text)}</code></pre>")
-            continue
-        if block.type == "blockquote":
-            body_lines.append(
-                f"<blockquote><p>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p></blockquote>"
-            )
-            continue
-        if block.type == "table":
-            numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
-            body_lines.append("<table>")
-            body_lines.append("  <thead>")
-            body_lines.append("    <tr>")
-            for idx, h in enumerate(block.headers):
-                class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
-                body_lines.append(
-                    f"      <th{class_attr}>{_render_inline(h, base_path, self_contained, warnings, inline_opts)}</th>"
-                )
-            body_lines.append("    </tr>")
-            body_lines.append("  </thead>")
-            body_lines.append("  <tbody>")
-            for row in block.rows:
-                body_lines.append("    <tr>")
-                for idx, cell in enumerate(row):
-                    class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
-                    body_lines.append(
-                        f"      <td{class_attr}>{_render_inline(cell, base_path, self_contained, warnings, inline_opts)}</td>"
-                    )
-            body_lines.append("    </tr>")
-            body_lines.append("  </tbody>")
-            body_lines.append("</table>")
-            continue
+
+        _render_non_heading_block(
+            block,
+            body_lines=body_lines,
+            profile=profile,
+            base_path=base_path,
+            self_contained=self_contained,
+            warnings=warnings,
+            inline_opts=inline_opts,
+            current_section_slug=current_section_slug,
+        )
+        i += 1
    if section_open:
        body_lines.append("</section>")
    body_lines.append("</main>")
@ -386,6 +362,125 @@ def _render_doc(
    return "\n".join(html_lines).rstrip() + "\n"


+def _headings_keep_enabled(profile: dict[str, Any]) -> bool:
+    headings = profile.get("headings") or {}
+    if not isinstance(headings, dict):
+        return False
+    keep_lines = int(headings.get("keep_with_next_lines") or 0)
+    avoid_stranded = bool(headings.get("avoid_stranded_headings") or False)
+    return keep_lines > 0 or avoid_stranded
+
+
+def _keep_with_next_indices(blocks: list[Any], heading_index: int) -> list[int]:
+    """
+    Best-effort pagination guard: keep a heading with the first meaningful content block that follows.
+
+    This is designed to prevent headings from landing at the bottom of a page while a table/list/code block
+    is pushed to the next page. It is intentionally conservative and does not attempt precise line-count math.
+    """
+
+    first = heading_index + 1
+    if first >= len(blocks):
+        return []
+    if getattr(blocks[first], "type", None) == "heading":
+        return []
+
+    # If the next block is a short paragraph and the following block is a table/list/code/blockquote,
+    # include both so the heading moves with the real content block.
+    indices: list[int] = []
+    max_meta_paragraphs = 2
+    max_meta_chars = 220
+
+    j = first
+    while j < len(blocks) and getattr(blocks[j], "type", None) == "paragraph" and len(indices) < max_meta_paragraphs:
+        text = getattr(blocks[j], "text", "") or ""
+        if len(text.strip()) > max_meta_chars:
+            break
+        indices.append(j)
+        j += 1
+
+    if not indices:
+        return [first]
+
+    if j < len(blocks) and getattr(blocks[j], "type", None) in {"table", "list", "code", "blockquote"}:
+        indices.append(j)
+    return indices
+
+
+def _render_non_heading_block(
+    block: Any,
+    *,
+    body_lines: list[str],
+    profile: dict[str, Any],
+    base_path: Path,
+    self_contained: bool,
+    warnings: list[str],
+    inline_opts: InlineOptions,
+    current_section_slug: str | None,
+) -> None:
+    if block.type == "paragraph":
+        img_only = _image_only_paragraph(block.text)
+        if img_only:
+            alt, url = img_only
+            src = _resolve_image_src(url, base_path, self_contained, warnings)
+            body_lines.append("<figure class=\"if-figure\">")
+            body_lines.append(f"  <img src=\"{html.escape(src)}\" alt=\"{html.escape(alt)}\">")
+            body_lines.append("</figure>")
+            return
+        ref_id = _leading_citation_id(block.text)
+        attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
+        body_lines.append(f"<p{attr}>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p>")
+        return
+
+    if block.type == "list":
+        tag = "ol" if block.ordered else "ul"
+        body_lines.append(f"<{tag}>")
+        for item in block.items:
+            groomed = _groom_list_item_text(item, profile, current_section_slug)
+            rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
+            rendered = rendered.replace(_NBSP_TOKEN, "&nbsp;")
+            body_lines.append(f"  <li>{rendered}</li>")
+        body_lines.append(f"</{tag}>")
+        return
+
+    if block.type == "code":
+        lang = (block.info or "").strip()
+        class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
+        body_lines.append(f"<pre><code{class_attr}>{html.escape(block.text)}</code></pre>")
+        return
+
+    if block.type == "blockquote":
+        body_lines.append(
+            f"<blockquote><p>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p></blockquote>"
+        )
+        return
+
+    if block.type == "table":
+        numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
+        body_lines.append("<table>")
+        body_lines.append("  <thead>")
+        body_lines.append("    <tr>")
+        for idx, header in enumerate(block.headers):
+            class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
+            body_lines.append(
+                f"      <th{class_attr}>{_render_inline(header, base_path, self_contained, warnings, inline_opts)}</th>"
+            )
+        body_lines.append("    </tr>")
+        body_lines.append("  </thead>")
+        body_lines.append("  <tbody>")
+        for row in block.rows:
+            body_lines.append("    <tr>")
+            for idx, cell in enumerate(row):
+                class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
+                body_lines.append(
+                    f"      <td{class_attr}>{_render_inline(cell, base_path, self_contained, warnings, inline_opts)}</td>"
+                )
+            body_lines.append("    </tr>")
+        body_lines.append("  </tbody>")
+        body_lines.append("</table>")
+        return
+
+
 def _doc_title(doc: MdDocument) -> str:
    for block in doc.blocks:
        if block.type == "heading" and block.level == 1:
--- a/tests/test_rendering_inline.py
+++ b/tests/test_rendering_inline.py
@ -2,6 +2,7 @@ import sys
 import unittest
 from pathlib import Path
 from tempfile import TemporaryDirectory
+import re

 ROOT = Path(__file__).resolve().parents[1]
 SRC = ROOT / "src"
@ -28,6 +29,35 @@ class RenderingInlineTests(unittest.TestCase):
        self.assertIn("<strong>Summary:</strong>", html)
        self.assertIn("Line one<br>", html)

+    def test_keep_with_next_wraps_heading_and_table(self) -> None:
+        with TemporaryDirectory() as tmpdir:
+            md_path = Path(tmpdir) / "table.md"
+            md_path.write_text(
+                "## 3) Tableau des chambres\n\n"
+                "Source: capture instantanée.\n\n"
+                "| Property | Room type |\n"
+                "| --- | --- |\n"
+                "| HO36 | Chambre |\n"
+                "| Le Flaneur | Dortoir |\n",
+                encoding="utf-8",
+            )
+            profile = {
+                "profile_id": "test",
+                "headings": {"avoid_stranded_headings": True, "keep_with_next_lines": 1},
+            }
+            result = render_html(md_path, profile)
+            html = result.html
+
+        match = re.search(
+            r'<div class="if-keep-with-next">\s*<h2[^>]*>3\) Tableau des chambres</h2>.*?<table>.*?</table>',
+            html,
+            flags=re.S,
+        )
+        self.assertIsNotNone(match)
+        table_html = match.group(0) if match else ""
+        self.assertEqual(table_html.count("<tr>"), 3)  # header + 2 rows
+        self.assertEqual(table_html.count("</tr>"), 3)
+

 if __name__ == "__main__":
    unittest.main()