This commit is contained in:
parent
f8808cecd1
commit
01d082b038
3 changed files with 194 additions and 63 deletions
|
|
@ -261,6 +261,12 @@ def generate_profile_css(profile: dict[str, Any]) -> CssOutput:
|
|||
css.append("}")
|
||||
css.append("")
|
||||
|
||||
css.append(".if-keep-with-next {")
|
||||
css.append(" break-inside: avoid;")
|
||||
css.append(" page-break-inside: avoid;")
|
||||
css.append("}")
|
||||
css.append("")
|
||||
|
||||
css.append(f"h1 {{ font-size: {h1_size}; }}")
|
||||
css.append(f"h2 {{ font-size: {h2_size}; }}")
|
||||
css.append(f"h3 {{ font-size: {h3_size}; }}")
|
||||
|
|
|
|||
|
|
@ -290,8 +290,12 @@ def _render_doc(
|
|||
body_lines: list[str] = []
|
||||
section_open = False
|
||||
current_section_slug: str | None = None
|
||||
keep_headings = _headings_keep_enabled(profile)
|
||||
body_lines.append("<main>")
|
||||
for block in doc.blocks:
|
||||
blocks = doc.blocks
|
||||
i = 0
|
||||
while i < len(blocks):
|
||||
block = blocks[i]
|
||||
if block.type == "heading":
|
||||
level = block.level or 1
|
||||
slug = _unique_slug(block.text, heading_ids)
|
||||
|
|
@ -301,71 +305,43 @@ def _render_doc(
|
|||
body_lines.append(f"<section class=\"if-section if-section-{slug}\">")
|
||||
section_open = True
|
||||
current_section_slug = slug
|
||||
body_lines.append(
|
||||
heading_html = (
|
||||
f"<h{level} id=\"{slug}\">{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</h{level}>"
|
||||
)
|
||||
if keep_headings and level <= 3:
|
||||
keep_indices = _keep_with_next_indices(blocks, i)
|
||||
if keep_indices:
|
||||
body_lines.append("<div class=\"if-keep-with-next\">")
|
||||
body_lines.append(heading_html)
|
||||
for j in keep_indices:
|
||||
_render_non_heading_block(
|
||||
blocks[j],
|
||||
body_lines=body_lines,
|
||||
profile=profile,
|
||||
base_path=base_path,
|
||||
self_contained=self_contained,
|
||||
warnings=warnings,
|
||||
inline_opts=inline_opts,
|
||||
current_section_slug=current_section_slug,
|
||||
)
|
||||
body_lines.append("</div>")
|
||||
i = max(keep_indices) + 1
|
||||
continue
|
||||
body_lines.append(heading_html)
|
||||
i += 1
|
||||
continue
|
||||
if block.type == "paragraph":
|
||||
img_only = _image_only_paragraph(block.text)
|
||||
if img_only:
|
||||
alt, url = img_only
|
||||
src = _resolve_image_src(url, base_path, self_contained, warnings)
|
||||
body_lines.append("<figure class=\"if-figure\">")
|
||||
body_lines.append(f" <img src=\"{html.escape(src)}\" alt=\"{html.escape(alt)}\">")
|
||||
body_lines.append("</figure>")
|
||||
else:
|
||||
ref_id = _leading_citation_id(block.text)
|
||||
attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
|
||||
body_lines.append(
|
||||
f"<p{attr}>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p>"
|
||||
)
|
||||
continue
|
||||
if block.type == "list":
|
||||
tag = "ol" if block.ordered else "ul"
|
||||
body_lines.append(f"<{tag}>")
|
||||
for item in block.items:
|
||||
groomed = _groom_list_item_text(item, profile, current_section_slug)
|
||||
rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
|
||||
rendered = rendered.replace(_NBSP_TOKEN, " ")
|
||||
body_lines.append(
|
||||
f" <li>{rendered}</li>"
|
||||
)
|
||||
body_lines.append(f"</{tag}>")
|
||||
continue
|
||||
if block.type == "code":
|
||||
lang = block.info.strip()
|
||||
class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
|
||||
body_lines.append(f"<pre><code{class_attr}>{html.escape(block.text)}</code></pre>")
|
||||
continue
|
||||
if block.type == "blockquote":
|
||||
body_lines.append(
|
||||
f"<blockquote><p>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p></blockquote>"
|
||||
)
|
||||
continue
|
||||
if block.type == "table":
|
||||
numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
|
||||
body_lines.append("<table>")
|
||||
body_lines.append(" <thead>")
|
||||
body_lines.append(" <tr>")
|
||||
for idx, h in enumerate(block.headers):
|
||||
class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
|
||||
body_lines.append(
|
||||
f" <th{class_attr}>{_render_inline(h, base_path, self_contained, warnings, inline_opts)}</th>"
|
||||
)
|
||||
body_lines.append(" </tr>")
|
||||
body_lines.append(" </thead>")
|
||||
body_lines.append(" <tbody>")
|
||||
for row in block.rows:
|
||||
body_lines.append(" <tr>")
|
||||
for idx, cell in enumerate(row):
|
||||
class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
|
||||
body_lines.append(
|
||||
f" <td{class_attr}>{_render_inline(cell, base_path, self_contained, warnings, inline_opts)}</td>"
|
||||
)
|
||||
body_lines.append(" </tr>")
|
||||
body_lines.append(" </tbody>")
|
||||
body_lines.append("</table>")
|
||||
continue
|
||||
|
||||
_render_non_heading_block(
|
||||
block,
|
||||
body_lines=body_lines,
|
||||
profile=profile,
|
||||
base_path=base_path,
|
||||
self_contained=self_contained,
|
||||
warnings=warnings,
|
||||
inline_opts=inline_opts,
|
||||
current_section_slug=current_section_slug,
|
||||
)
|
||||
i += 1
|
||||
if section_open:
|
||||
body_lines.append("</section>")
|
||||
body_lines.append("</main>")
|
||||
|
|
@ -386,6 +362,125 @@ def _render_doc(
|
|||
return "\n".join(html_lines).rstrip() + "\n"
|
||||
|
||||
|
||||
def _headings_keep_enabled(profile: dict[str, Any]) -> bool:
|
||||
headings = profile.get("headings") or {}
|
||||
if not isinstance(headings, dict):
|
||||
return False
|
||||
keep_lines = int(headings.get("keep_with_next_lines") or 0)
|
||||
avoid_stranded = bool(headings.get("avoid_stranded_headings") or False)
|
||||
return keep_lines > 0 or avoid_stranded
|
||||
|
||||
|
||||
def _keep_with_next_indices(blocks: list[Any], heading_index: int) -> list[int]:
|
||||
"""
|
||||
Best-effort pagination guard: keep a heading with the first meaningful content block that follows.
|
||||
|
||||
This is designed to prevent headings from landing at the bottom of a page while a table/list/code block
|
||||
is pushed to the next page. It is intentionally conservative and does not attempt precise line-count math.
|
||||
"""
|
||||
|
||||
first = heading_index + 1
|
||||
if first >= len(blocks):
|
||||
return []
|
||||
if getattr(blocks[first], "type", None) == "heading":
|
||||
return []
|
||||
|
||||
# If the next block is a short paragraph and the following block is a table/list/code/blockquote,
|
||||
# include both so the heading moves with the real content block.
|
||||
indices: list[int] = []
|
||||
max_meta_paragraphs = 2
|
||||
max_meta_chars = 220
|
||||
|
||||
j = first
|
||||
while j < len(blocks) and getattr(blocks[j], "type", None) == "paragraph" and len(indices) < max_meta_paragraphs:
|
||||
text = getattr(blocks[j], "text", "") or ""
|
||||
if len(text.strip()) > max_meta_chars:
|
||||
break
|
||||
indices.append(j)
|
||||
j += 1
|
||||
|
||||
if not indices:
|
||||
return [first]
|
||||
|
||||
if j < len(blocks) and getattr(blocks[j], "type", None) in {"table", "list", "code", "blockquote"}:
|
||||
indices.append(j)
|
||||
return indices
|
||||
|
||||
|
||||
def _render_non_heading_block(
|
||||
block: Any,
|
||||
*,
|
||||
body_lines: list[str],
|
||||
profile: dict[str, Any],
|
||||
base_path: Path,
|
||||
self_contained: bool,
|
||||
warnings: list[str],
|
||||
inline_opts: InlineOptions,
|
||||
current_section_slug: str | None,
|
||||
) -> None:
|
||||
if block.type == "paragraph":
|
||||
img_only = _image_only_paragraph(block.text)
|
||||
if img_only:
|
||||
alt, url = img_only
|
||||
src = _resolve_image_src(url, base_path, self_contained, warnings)
|
||||
body_lines.append("<figure class=\"if-figure\">")
|
||||
body_lines.append(f" <img src=\"{html.escape(src)}\" alt=\"{html.escape(alt)}\">")
|
||||
body_lines.append("</figure>")
|
||||
return
|
||||
ref_id = _leading_citation_id(block.text)
|
||||
attr = f" id=\"{html.escape(ref_id)}\"" if ref_id else ""
|
||||
body_lines.append(f"<p{attr}>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p>")
|
||||
return
|
||||
|
||||
if block.type == "list":
|
||||
tag = "ol" if block.ordered else "ul"
|
||||
body_lines.append(f"<{tag}>")
|
||||
for item in block.items:
|
||||
groomed = _groom_list_item_text(item, profile, current_section_slug)
|
||||
rendered = _render_inline(groomed, base_path, self_contained, warnings, inline_opts)
|
||||
rendered = rendered.replace(_NBSP_TOKEN, " ")
|
||||
body_lines.append(f" <li>{rendered}</li>")
|
||||
body_lines.append(f"</{tag}>")
|
||||
return
|
||||
|
||||
if block.type == "code":
|
||||
lang = (block.info or "").strip()
|
||||
class_attr = f" class=\"language-{html.escape(lang)}\"" if lang else ""
|
||||
body_lines.append(f"<pre><code{class_attr}>{html.escape(block.text)}</code></pre>")
|
||||
return
|
||||
|
||||
if block.type == "blockquote":
|
||||
body_lines.append(
|
||||
f"<blockquote><p>{_render_inline(block.text, base_path, self_contained, warnings, inline_opts)}</p></blockquote>"
|
||||
)
|
||||
return
|
||||
|
||||
if block.type == "table":
|
||||
numeric_cols, bool_cols = _table_column_classes(block.headers, block.rows)
|
||||
body_lines.append("<table>")
|
||||
body_lines.append(" <thead>")
|
||||
body_lines.append(" <tr>")
|
||||
for idx, header in enumerate(block.headers):
|
||||
class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
|
||||
body_lines.append(
|
||||
f" <th{class_attr}>{_render_inline(header, base_path, self_contained, warnings, inline_opts)}</th>"
|
||||
)
|
||||
body_lines.append(" </tr>")
|
||||
body_lines.append(" </thead>")
|
||||
body_lines.append(" <tbody>")
|
||||
for row in block.rows:
|
||||
body_lines.append(" <tr>")
|
||||
for idx, cell in enumerate(row):
|
||||
class_attr = _table_class_attr(idx, numeric_cols, bool_cols)
|
||||
body_lines.append(
|
||||
f" <td{class_attr}>{_render_inline(cell, base_path, self_contained, warnings, inline_opts)}</td>"
|
||||
)
|
||||
body_lines.append(" </tr>")
|
||||
body_lines.append(" </tbody>")
|
||||
body_lines.append("</table>")
|
||||
return
|
||||
|
||||
|
||||
def _doc_title(doc: MdDocument) -> str:
|
||||
for block in doc.blocks:
|
||||
if block.type == "heading" and block.level == 1:
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ import sys
|
|||
import unittest
|
||||
from pathlib import Path
|
||||
from tempfile import TemporaryDirectory
|
||||
import re
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[1]
|
||||
SRC = ROOT / "src"
|
||||
|
|
@ -28,6 +29,35 @@ class RenderingInlineTests(unittest.TestCase):
|
|||
self.assertIn("<strong>Summary:</strong>", html)
|
||||
self.assertIn("Line one<br>", html)
|
||||
|
||||
def test_keep_with_next_wraps_heading_and_table(self) -> None:
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
md_path = Path(tmpdir) / "table.md"
|
||||
md_path.write_text(
|
||||
"## 3) Tableau des chambres\n\n"
|
||||
"Source: capture instantanée.\n\n"
|
||||
"| Property | Room type |\n"
|
||||
"| --- | --- |\n"
|
||||
"| HO36 | Chambre |\n"
|
||||
"| Le Flaneur | Dortoir |\n",
|
||||
encoding="utf-8",
|
||||
)
|
||||
profile = {
|
||||
"profile_id": "test",
|
||||
"headings": {"avoid_stranded_headings": True, "keep_with_next_lines": 1},
|
||||
}
|
||||
result = render_html(md_path, profile)
|
||||
html = result.html
|
||||
|
||||
match = re.search(
|
||||
r'<div class="if-keep-with-next">\s*<h2[^>]*>3\) Tableau des chambres</h2>.*?<table>.*?</table>',
|
||||
html,
|
||||
flags=re.S,
|
||||
)
|
||||
self.assertIsNotNone(match)
|
||||
table_html = match.group(0) if match else ""
|
||||
self.assertEqual(table_html.count("<tr>"), 3) # header + 2 rows
|
||||
self.assertEqual(table_html.count("</tr>"), 3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue