diff --git a/src/iftypeset/css_gen.py b/src/iftypeset/css_gen.py index e32c96e..ac2bcb2 100644 --- a/src/iftypeset/css_gen.py +++ b/src/iftypeset/css_gen.py @@ -261,6 +261,12 @@ def generate_profile_css(profile: dict[str, Any]) -> CssOutput: css.append("}") css.append("") + css.append(".if-keep-with-next {") + css.append(" break-inside: avoid;") + css.append(" page-break-inside: avoid;") + css.append("}") + css.append("") + css.append(f"h1 {{ font-size: {h1_size}; }}") css.append(f"h2 {{ font-size: {h2_size}; }}") css.append(f"h3 {{ font-size: {h3_size}; }}") diff --git a/src/iftypeset/rendering.py b/src/iftypeset/rendering.py index e498c06..fbe5e7f 100644 --- a/src/iftypeset/rendering.py +++ b/src/iftypeset/rendering.py @@ -290,8 +290,12 @@ def _render_doc( body_lines: list[str] = [] section_open = False current_section_slug: str | None = None + keep_headings = _headings_keep_enabled(profile) body_lines.append("
") - for block in doc.blocks: + blocks = doc.blocks + i = 0 + while i < len(blocks): + block = blocks[i] if block.type == "heading": level = block.level or 1 slug = _unique_slug(block.text, heading_ids) @@ -301,71 +305,43 @@ def _render_doc( body_lines.append(f"
") section_open = True current_section_slug = slug - body_lines.append( + heading_html = ( f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}" ) + if keep_headings and level <= 3: + keep_indices = _keep_with_next_indices(blocks, i) + if keep_indices: + body_lines.append("
") + body_lines.append(heading_html) + for j in keep_indices: + _render_non_heading_block( + blocks[j], + body_lines=body_lines, + profile=profile, + base_path=base_path, + self_contained=self_contained, + warnings=warnings, + inline_opts=inline_opts, + current_section_slug=current_section_slug, + ) + body_lines.append("
") + i = max(keep_indices) + 1 + continue + body_lines.append(heading_html) + i += 1 continue - if block.type == "paragraph": - img_only = _image_only_paragraph(block.text) - if img_only: - alt, url = img_only - src = _resolve_image_src(url, base_path, self_contained, warnings) - body_lines.append("
") - body_lines.append(f" \"{html.escape(alt)}\"") - body_lines.append("
") - else: - ref_id = _leading_citation_id(block.text) - attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else "" - body_lines.append( - f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}

" - ) - continue - if block.type == "list": - tag = "ol" if block.ordered else "ul" - body_lines.append(f"<{tag}>") - for item in block.items: - groomed = _groom_list_item_text(item, profile, current_section_slug) - rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts) - rendered = rendered.replace(_NBSP_TOKEN, " ") - body_lines.append( - f"
  • {rendered}
  • " - ) - body_lines.append(f"") - continue - if block.type == "code": - lang = block.info.strip() - class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else "" - body_lines.append(f"
    {html.escape(block.text)}
    ") - continue - if block.type == "blockquote": - body_lines.append( - f"

    {_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}

    " - ) - continue - if block.type == "table": - numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows) - body_lines.append("") - body_lines.append(" ") - body_lines.append(" ") - for idx, h in enumerate(block.headers): - class_attr = _table_class_attr(idx, numeric_cols, bool_cols) - body_lines.append( - f" {_render_inline(h, base_path, self_contained, warnings, inline_opts)}" - ) - body_lines.append(" ") - body_lines.append(" ") - body_lines.append(" ") - for row in block.rows: - body_lines.append(" ") - for idx, cell in enumerate(row): - class_attr = _table_class_attr(idx, numeric_cols, bool_cols) - body_lines.append( - f" {_render_inline(cell, base_path, self_contained, warnings, inline_opts)}" - ) - body_lines.append(" ") - body_lines.append(" ") - body_lines.append("
    ") - continue + + _render_non_heading_block( + block, + body_lines=body_lines, + profile=profile, + base_path=base_path, + self_contained=self_contained, + warnings=warnings, + inline_opts=inline_opts, + current_section_slug=current_section_slug, + ) + i += 1 if section_open: body_lines.append("
    ") body_lines.append("
    ") @@ -386,6 +362,125 @@ def _render_doc( return "\n".join(html_lines).rstrip() + "\n" +def _headings_keep_enabled(profile: dict[str, Any]) -> bool: + headings = profile.get("headings") or {} + if not isinstance(headings, dict): + return False + keep_lines = int(headings.get("keep_with_next_lines") or 0) + avoid_stranded = bool(headings.get("avoid_stranded_headings") or False) + return keep_lines > 0 or avoid_stranded + + +def _keep_with_next_indices(blocks: list[Any], heading_index: int) -> list[int]: + """ + Best-effort pagination guard: keep a heading with the first meaningful content block that follows. + + This is designed to prevent headings from landing at the bottom of a page while a table/list/code block + is pushed to the next page. It is intentionally conservative and does not attempt precise line-count math. + """ + + first = heading_index + 1 + if first >= len(blocks): + return [] + if getattr(blocks[first], "type", None) == "heading": + return [] + + # If the next block is a short paragraph and the following block is a table/list/code/blockquote, + # include both so the heading moves with the real content block. + indices: list[int] = [] + max_meta_paragraphs = 2 + max_meta_chars = 220 + + j = first + while j < len(blocks) and getattr(blocks[j], "type", None) == "paragraph" and len(indices) < max_meta_paragraphs: + text = getattr(blocks[j], "text", "") or "" + if len(text.strip()) > max_meta_chars: + break + indices.append(j) + j += 1 + + if not indices: + return [first] + + if j < len(blocks) and getattr(blocks[j], "type", None) in {"table", "list", "code", "blockquote"}: + indices.append(j) + return indices + + +def _render_non_heading_block( + block: Any, + *, + body_lines: list[str], + profile: dict[str, Any], + base_path: Path, + self_contained: bool, + warnings: list[str], + inline_opts: InlineOptions, + current_section_slug: str | None, +) -> None: + if block.type == "paragraph": + img_only = _image_only_paragraph(block.text) + if img_only: + alt, url = img_only + src = _resolve_image_src(url, base_path, self_contained, warnings) + body_lines.append("
    ") + body_lines.append(f" \"{html.escape(alt)}\"") + body_lines.append("
    ") + return + ref_id = _leading_citation_id(block.text) + attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else "" + body_lines.append(f"{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}

    ") + return + + if block.type == "list": + tag = "ol" if block.ordered else "ul" + body_lines.append(f"<{tag}>") + for item in block.items: + groomed = _groom_list_item_text(item, profile, current_section_slug) + rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts) + rendered = rendered.replace(_NBSP_TOKEN, " ") + body_lines.append(f"
  • {rendered}
  • ") + body_lines.append(f"") + return + + if block.type == "code": + lang = (block.info or "").strip() + class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else "" + body_lines.append(f"
    {html.escape(block.text)}
    ") + return + + if block.type == "blockquote": + body_lines.append( + f"

    {_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}

    " + ) + return + + if block.type == "table": + numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows) + body_lines.append("") + body_lines.append(" ") + body_lines.append(" ") + for idx, header in enumerate(block.headers): + class_attr = _table_class_attr(idx, numeric_cols, bool_cols) + body_lines.append( + f" {_render_inline(header, base_path, self_contained, warnings, inline_opts)}" + ) + body_lines.append(" ") + body_lines.append(" ") + body_lines.append(" ") + for row in block.rows: + body_lines.append(" ") + for idx, cell in enumerate(row): + class_attr = _table_class_attr(idx, numeric_cols, bool_cols) + body_lines.append( + f" {_render_inline(cell, base_path, self_contained, warnings, inline_opts)}" + ) + body_lines.append(" ") + body_lines.append(" ") + body_lines.append("
    ") + return + + def _doc_title(doc: MdDocument) -> str: for block in doc.blocks: if block.type == "heading" and block.level == 1: diff --git a/tests/test_rendering_inline.py b/tests/test_rendering_inline.py index 122d1d2..d940a4b 100644 --- a/tests/test_rendering_inline.py +++ b/tests/test_rendering_inline.py @@ -2,6 +2,7 @@ import sys import unittest from pathlib import Path from tempfile import TemporaryDirectory +import re ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" @@ -28,6 +29,35 @@ class RenderingInlineTests(unittest.TestCase): self.assertIn("Summary:", html) self.assertIn("Line one
    ", html) + def test_keep_with_next_wraps_heading_and_table(self) -> None: + with TemporaryDirectory() as tmpdir: + md_path = Path(tmpdir) / "table.md" + md_path.write_text( + "## 3) Tableau des chambres\n\n" + "Source: capture instantanée.\n\n" + "| Property | Room type |\n" + "| --- | --- |\n" + "| HO36 | Chambre |\n" + "| Le Flaneur | Dortoir |\n", + encoding="utf-8", + ) + profile = { + "profile_id": "test", + "headings": {"avoid_stranded_headings": True, "keep_with_next_lines": 1}, + } + result = render_html(md_path, profile) + html = result.html + + match = re.search( + r'
    \s*]*>3\) Tableau des chambres.*?.*?
    ', + html, + flags=re.S, + ) + self.assertIsNotNone(match) + table_html = match.group(0) if match else "" + self.assertEqual(table_html.count(""), 3) # header + 2 rows + self.assertEqual(table_html.count(""), 3) + if __name__ == "__main__": unittest.main()