diff --git a/src/iftypeset/css_gen.py b/src/iftypeset/css_gen.py
index e32c96e..ac2bcb2 100644
--- a/src/iftypeset/css_gen.py
+++ b/src/iftypeset/css_gen.py
@@ -261,6 +261,12 @@ def generate_profile_css(profile: dict[str, Any]) -> CssOutput:
css.append("}")
css.append("")
+ css.append(".if-keep-with-next {")
+ css.append(" break-inside: avoid;")
+ css.append(" page-break-inside: avoid;")
+ css.append("}")
+ css.append("")
+
css.append(f"h1 {{ font-size: {h1_size}; }}")
css.append(f"h2 {{ font-size: {h2_size}; }}")
css.append(f"h3 {{ font-size: {h3_size}; }}")
diff --git a/src/iftypeset/rendering.py b/src/iftypeset/rendering.py
index e498c06..fbe5e7f 100644
--- a/src/iftypeset/rendering.py
+++ b/src/iftypeset/rendering.py
@@ -290,8 +290,12 @@ def _render_doc(
body_lines: list[str] = []
section_open = False
current_section_slug: str | None = None
+ keep_headings = _headings_keep_enabled(profile)
body_lines.append("")
- for block in doc.blocks:
+ blocks = doc.blocks
+ i = 0
+ while i < len(blocks):
+ block = blocks[i]
if block.type == "heading":
level = block.level or 1
slug = _unique_slug(block.text, heading_ids)
@@ -301,71 +305,43 @@ def _render_doc(
body_lines.append(f"")
section_open = True
current_section_slug = slug
- body_lines.append(
+ heading_html = (
f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}"
)
+ if keep_headings and level <= 3:
+ keep_indices = _keep_with_next_indices(blocks, i)
+ if keep_indices:
+ body_lines.append("")
+ body_lines.append(heading_html)
+ for j in keep_indices:
+ _render_non_heading_block(
+ blocks[j],
+ body_lines=body_lines,
+ profile=profile,
+ base_path=base_path,
+ self_contained=self_contained,
+ warnings=warnings,
+ inline_opts=inline_opts,
+ current_section_slug=current_section_slug,
+ )
+ body_lines.append("
")
+ i = max(keep_indices) + 1
+ continue
+ body_lines.append(heading_html)
+ i += 1
continue
- if block.type == "paragraph":
- img_only = _image_only_paragraph(block.text)
- if img_only:
- alt, url = img_only
- src = _resolve_image_src(url, base_path, self_contained, warnings)
- body_lines.append("")
- body_lines.append(f"
")
- body_lines.append("")
- else:
- ref_id = _leading_citation_id(block.text)
- attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
- body_lines.append(
- f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}
"
- )
- continue
- if block.type == "list":
- tag = "ol" if block.ordered else "ul"
- body_lines.append(f"<{tag}>")
- for item in block.items:
- groomed = _groom_list_item_text(item, profile, current_section_slug)
- rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
- rendered = rendered.replace(_NBSP_TOKEN, " ")
- body_lines.append(
- f" {rendered}"
- )
- body_lines.append(f"{tag}>")
- continue
- if block.type == "code":
- lang = block.info.strip()
- class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
- body_lines.append(f"{html.escape(block.text)}
")
- continue
- if block.type == "blockquote":
- body_lines.append(
- f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}
"
- )
- continue
- if block.type == "table":
- numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
- body_lines.append("")
- body_lines.append(" ")
- body_lines.append(" ")
- for idx, h in enumerate(block.headers):
- class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
- body_lines.append(
- f" | {_render_inline(h, base_path, self_contained, warnings, inline_opts)} | "
- )
- body_lines.append("
")
- body_lines.append(" ")
- body_lines.append(" ")
- for row in block.rows:
- body_lines.append(" ")
- for idx, cell in enumerate(row):
- class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
- body_lines.append(
- f" | {_render_inline(cell, base_path, self_contained, warnings, inline_opts)} | "
- )
- body_lines.append("
")
- body_lines.append(" ")
- body_lines.append("
")
- continue
+
+ _render_non_heading_block(
+ block,
+ body_lines=body_lines,
+ profile=profile,
+ base_path=base_path,
+ self_contained=self_contained,
+ warnings=warnings,
+ inline_opts=inline_opts,
+ current_section_slug=current_section_slug,
+ )
+ i += 1
if section_open:
body_lines.append("")
body_lines.append("")
@@ -386,6 +362,125 @@ def _render_doc(
return "\n".join(html_lines).rstrip() + "\n"
+def _headings_keep_enabled(profile: dict[str, Any]) -> bool:
+ headings = profile.get("headings") or {}
+ if not isinstance(headings, dict):
+ return False
+ keep_lines = int(headings.get("keep_with_next_lines") or 0)
+ avoid_stranded = bool(headings.get("avoid_stranded_headings") or False)
+ return keep_lines > 0 or avoid_stranded
+
+
+def _keep_with_next_indices(blocks: list[Any], heading_index: int) -> list[int]:
+ """
+ Best-effort pagination guard: keep a heading with the first meaningful content block that follows.
+
+ This is designed to prevent headings from landing at the bottom of a page while a table/list/code block
+ is pushed to the next page. It is intentionally conservative and does not attempt precise line-count math.
+ """
+
+ first = heading_index + 1
+ if first >= len(blocks):
+ return []
+ if getattr(blocks[first], "type", None) == "heading":
+ return []
+
+ # If the next block is a short paragraph and the following block is a table/list/code/blockquote,
+ # include both so the heading moves with the real content block.
+ indices: list[int] = []
+ max_meta_paragraphs = 2
+ max_meta_chars = 220
+
+ j = first
+ while j < len(blocks) and getattr(blocks[j], "type", None) == "paragraph" and len(indices) < max_meta_paragraphs:
+ text = getattr(blocks[j], "text", "") or ""
+ if len(text.strip()) > max_meta_chars:
+ break
+ indices.append(j)
+ j += 1
+
+ if not indices:
+ return [first]
+
+ if j < len(blocks) and getattr(blocks[j], "type", None) in {"table", "list", "code", "blockquote"}:
+ indices.append(j)
+ return indices
+
+
+def _render_non_heading_block(
+ block: Any,
+ *,
+ body_lines: list[str],
+ profile: dict[str, Any],
+ base_path: Path,
+ self_contained: bool,
+ warnings: list[str],
+ inline_opts: InlineOptions,
+ current_section_slug: str | None,
+) -> None:
+ if block.type == "paragraph":
+ img_only = _image_only_paragraph(block.text)
+ if img_only:
+ alt, url = img_only
+ src = _resolve_image_src(url, base_path, self_contained, warnings)
+ body_lines.append("")
+ body_lines.append(f"
")
+ body_lines.append("")
+ return
+ ref_id = _leading_citation_id(block.text)
+ attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
+ body_lines.append(f"
{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}
")
+ return
+
+ if block.type == "list":
+ tag = "ol" if block.ordered else "ul"
+ body_lines.append(f"<{tag}>")
+ for item in block.items:
+ groomed = _groom_list_item_text(item, profile, current_section_slug)
+ rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
+ rendered = rendered.replace(_NBSP_TOKEN, " ")
+ body_lines.append(f" {rendered}")
+ body_lines.append(f"{tag}>")
+ return
+
+ if block.type == "code":
+ lang = (block.info or "").strip()
+ class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
+ body_lines.append(f"{html.escape(block.text)}
")
+ return
+
+ if block.type == "blockquote":
+ body_lines.append(
+ f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}
"
+ )
+ return
+
+ if block.type == "table":
+ numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
+ body_lines.append("")
+ body_lines.append(" ")
+ body_lines.append(" ")
+ for idx, header in enumerate(block.headers):
+ class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
+ body_lines.append(
+ f" | {_render_inline(header, base_path, self_contained, warnings, inline_opts)} | "
+ )
+ body_lines.append("
")
+ body_lines.append(" ")
+ body_lines.append(" ")
+ for row in block.rows:
+ body_lines.append(" ")
+ for idx, cell in enumerate(row):
+ class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
+ body_lines.append(
+ f" | {_render_inline(cell, base_path, self_contained, warnings, inline_opts)} | "
+ )
+ body_lines.append("
")
+ body_lines.append(" ")
+ body_lines.append("
")
+ return
+
+
def _doc_title(doc: MdDocument) -> str:
for block in doc.blocks:
if block.type == "heading" and block.level == 1:
diff --git a/tests/test_rendering_inline.py b/tests/test_rendering_inline.py
index 122d1d2..d940a4b 100644
--- a/tests/test_rendering_inline.py
+++ b/tests/test_rendering_inline.py
@@ -2,6 +2,7 @@ import sys
import unittest
from pathlib import Path
from tempfile import TemporaryDirectory
+import re
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
@@ -28,6 +29,35 @@ class RenderingInlineTests(unittest.TestCase):
self.assertIn("Summary:", html)
self.assertIn("Line one
", html)
+ def test_keep_with_next_wraps_heading_and_table(self) -> None:
+ with TemporaryDirectory() as tmpdir:
+ md_path = Path(tmpdir) / "table.md"
+ md_path.write_text(
+ "## 3) Tableau des chambres\n\n"
+ "Source: capture instantanée.\n\n"
+ "| Property | Room type |\n"
+ "| --- | --- |\n"
+ "| HO36 | Chambre |\n"
+ "| Le Flaneur | Dortoir |\n",
+ encoding="utf-8",
+ )
+ profile = {
+ "profile_id": "test",
+ "headings": {"avoid_stranded_headings": True, "keep_with_next_lines": 1},
+ }
+ result = render_html(md_path, profile)
+ html = result.html
+
+ match = re.search(
+ r'\s*
]*>3\) Tableau des chambres
.*?
',
+ html,
+ flags=re.S,
+ )
+ self.assertIsNotNone(match)
+ table_html = match.group(0) if match else ""
+ self.assertEqual(table_html.count("
"), 3) # header + 2 rows
+ self.assertEqual(table_html.count("
"), 3)
+
if __name__ == "__main__":
unittest.main()