Add server-side Markdown→PDF export (v0.1)
Some checks failed
pdfexport / pdfexport-worker-fixtures (push) Has been cancelled

This commit is contained in:
codex 2025-12-16 17:52:53 +00:00
parent 5ec1f7f363
commit 1ce1370983
31 changed files with 4698 additions and 1 deletions

View file

@ -0,0 +1,20 @@
name: pdfexport
on:
push:
pull_request:
jobs:
pdfexport-worker-fixtures:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Build worker image
run: docker build -t forgejo/pdf-worker:v0.1 ./worker/pdf
- name: Run fixtures (no network)
run: |
docker run --rm --network=none \
-v "$PWD/tests/fixtures/pdfexport:/fixtures:ro" \
forgejo/pdf-worker:v0.1 \
node scripts/test-fixtures.js --fixtures /fixtures

51
modules/setting/pdf.go Normal file
View file

@ -0,0 +1,51 @@
// SPDX-License-Identifier: MIT
package setting
import "strings"
// PDF holds server-side configuration for Markdown → PDF export.
//
// NOTE: The worker-facing config surface is represented in services/pdfexport and
// is always marshaled with the exact JSON keys required by the worker contract.
var PDF = struct {
Enabled bool
Determinism string
Timestamp string
Typography string
OrphansWidowsEnforce bool `ini:"ORPHANS_WIDOWS_ENFORCE"`
FooterEnabled bool `ini:"FOOTER_ENABLED"`
Mermaid struct {
Strategy string
Caption bool
}
// ContainerRuntime is the executable used to run the worker container.
// Typical values: docker, podman.
ContainerRuntime string `ini:"CONTAINER_RUNTIME"`
// WorkerImage is the container image reference to run.
WorkerImage string `ini:"WORKER_IMAGE"`
}{
Enabled: false,
Determinism: "strict",
Timestamp: "commit_time",
Typography: "professional",
OrphansWidowsEnforce: true,
FooterEnabled: true,
ContainerRuntime: "podman",
WorkerImage: "localhost/forgejo/pdf-worker:v0.1",
Mermaid: struct{ Strategy string; Caption bool }{Strategy: "balanced", Caption: false},
}
func loadPDFFrom(rootCfg ConfigProvider) {
mustMapSetting(rootCfg, "pdf", &PDF)
// Allow nested mermaid configuration under [pdf.mermaid].
mustMapSetting(rootCfg, "pdf.mermaid", &PDF.Mermaid)
PDF.Determinism = strings.ToLower(PDF.Determinism)
PDF.Timestamp = strings.ToLower(PDF.Timestamp)
PDF.Typography = strings.ToLower(PDF.Typography)
PDF.Mermaid.Strategy = strings.ToLower(PDF.Mermaid.Strategy)
}

View file

@ -155,6 +155,7 @@ func loadCommonSettingsFrom(cfg ConfigProvider) error {
loadGitFrom(cfg)
loadMirrorFrom(cfg)
loadMarkupFrom(cfg)
loadPDFFrom(cfg)
loadQuotaFrom(cfg)
loadOtherFrom(cfg)
return nil

View file

@ -1315,6 +1315,7 @@ n_release_few = %s releases
released_this = released this
file.title = %s at %s
file_raw = Raw
file_export_pdf = Export PDF
file_follow = Follow symlink
file_history = History
file_view_source = View source
@ -3925,4 +3926,3 @@ filepreview.truncated = Preview has been truncated
[translation_meta]
test = This is a test string. It is not displayed in Forgejo UI but is used for testing purposes. Feel free to enter "ok" to save time (or a fun fact of your choice) to hit that sweet 100% completion mark :)

View file

@ -6,6 +6,7 @@ package repo
import (
"path"
"strings"
"time"
git_model "code.gitea.io/gitea/models/git"
@ -17,6 +18,7 @@ import (
"code.gitea.io/gitea/modules/storage"
"code.gitea.io/gitea/routers/common"
"code.gitea.io/gitea/services/context"
"code.gitea.io/gitea/services/pdfexport"
)
// ServeBlobOrLFS download a git.Blob redirecting to LFS if necessary
@ -115,6 +117,11 @@ func getBlobForEntry(ctx *context.Context) (blob *git.Blob, lastModified *time.T
// SingleDownload download a file by repos path
func SingleDownload(ctx *context.Context) {
if strings.EqualFold(ctx.FormString("format"), "pdf") {
pdfexport.ServeMarkdownPDF(ctx)
return
}
blob, lastModified := getBlobForEntry(ctx)
if blob == nil {
return

View file

@ -393,6 +393,10 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
ctx.Data["FileIsSymlink"] = entry.IsLink()
ctx.Data["FileName"] = blob.Name()
ctx.Data["RawFileLink"] = ctx.Repo.RepoLink + "/raw/" + ctx.Repo.BranchNameSubURL() + "/" + util.PathEscapeSegments(ctx.Repo.TreePath)
ctx.Data["PDFExportLink"] = ""
if setting.PDF.Enabled && markup.IsMarkupFile(blob.Name(), "markdown") {
ctx.Data["PDFExportLink"] = ctx.Repo.RepoLink + "/raw/" + util.PathEscapeSegments(ctx.Repo.TreePath) + "?ref=" + url.QueryEscape(ctx.Repo.CommitID) + "&format=pdf"
}
if entry.IsLink() {
_, link, err := entry.FollowLinks()

View file

@ -0,0 +1,65 @@
// SPDX-License-Identifier: MIT
package pdfexport
import (
"encoding/json"
"strings"
"code.gitea.io/gitea/modules/setting"
)
func configFromSetting() (pdfConfig, userError) {
cfg := pdfConfig{
Determinism: strings.ToLower(setting.PDF.Determinism),
Timestamp: strings.ToLower(setting.PDF.Timestamp),
Typography: strings.ToLower(setting.PDF.Typography),
}
cfg.Mermaid.Strategy = strings.ToLower(setting.PDF.Mermaid.Strategy)
cfg.Mermaid.Caption = setting.PDF.Mermaid.Caption
cfg.OrphansWidows.Enforce = setting.PDF.OrphansWidowsEnforce
cfg.Footer.Enabled = setting.PDF.FooterEnabled
// Defaults if empty/misconfigured.
if cfg.Determinism == "" {
cfg.Determinism = "strict"
}
if cfg.Timestamp == "" {
cfg.Timestamp = "commit_time"
}
if cfg.Typography == "" {
cfg.Typography = "professional"
}
if cfg.Mermaid.Strategy == "" {
cfg.Mermaid.Strategy = "balanced"
}
if cfg.Determinism != "strict" && cfg.Determinism != "relaxed" {
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
}
if cfg.Timestamp != "commit_time" && cfg.Timestamp != "render_time" {
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
}
if cfg.Typography != "basic" && cfg.Typography != "professional" {
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
}
if cfg.Mermaid.Strategy != "fast" && cfg.Mermaid.Strategy != "balanced" && cfg.Mermaid.Strategy != "prestige" {
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
}
// Strict implies timestamp=commit_time only.
if cfg.Determinism == "strict" && cfg.Timestamp != "commit_time" {
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
}
return cfg, userError{}
}
func configHash(cfg pdfConfig) (string, error) {
b, err := json.Marshal(cfg)
if err != nil {
return "", err
}
return sha256Hex(b), nil
}

View file

@ -0,0 +1,28 @@
// SPDX-License-Identifier: MIT
package pdfexport
import "fmt"
type userError struct {
Status int
ErrorID string
Message string
}
func (e userError) Error() string {
return fmt.Sprintf("%s: %s", e.ErrorID, e.Message)
}
func errBadRequest(errorID, message string) userError {
return userError{Status: 400, ErrorID: errorID, Message: message}
}
func errNotFound(errorID, message string) userError {
return userError{Status: 404, ErrorID: errorID, Message: message}
}
func errInternal(errorID, message string) userError {
return userError{Status: 500, ErrorID: errorID, Message: message}
}

View file

@ -0,0 +1,148 @@
// SPDX-License-Identifier: MIT
package pdfexport
import (
"bytes"
gocontext "context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
)
const (
defaultWorkerTimeout = 60 * time.Second
maxWorkerStderrBytes = 256 * 1024
)
func runWorker(input workerInput) ([]byte, userError) {
if !setting.PDF.Enabled {
return nil, errBadRequest("ERR_PDF_DISABLED", "PDF export is disabled.")
}
jobDir, err := os.MkdirTemp("", "forgejo-pdfexport-job-*")
if err != nil {
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
defer func() {
if err := os.RemoveAll(jobDir); err != nil {
log.Error("pdfexport: remove job dir: %v", err)
}
}()
inPath := filepath.Join(jobDir, "input.json")
outPath := filepath.Join(jobDir, "output.pdf")
inBytes, err := json.Marshal(input)
if err != nil {
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
if err := os.WriteFile(inPath, inBytes, 0o600); err != nil {
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
runtime := setting.PDF.ContainerRuntime
if runtime == "" {
runtime = "podman"
}
image := setting.PDF.WorkerImage
if image == "" {
image = "localhost/forgejo/pdf-worker:v0.1"
}
ctx, cancel := gocontext.WithTimeout(gocontext.Background(), defaultWorkerTimeout)
defer cancel()
args := []string{
"run", "--rm",
"--network=none",
"--read-only",
"--cap-drop=ALL",
"--security-opt=no-new-privileges",
"--tmpfs", "/tmp:rw,noexec,nosuid,size=1024m",
"--volume", fmt.Sprintf("%s:/job:rw", jobDir),
"--memory", "2g",
"--cpus", "2",
image,
"node", "src/index.js",
"--in", "/job/input.json",
"--out", "/job/output.pdf",
}
if strings.Contains(runtime, "podman") {
// Proxmox/LXC deployments often confine AppArmor and can block the default container profile load.
args = append(args[:6], append([]string{"--security-opt=apparmor=unconfined"}, args[6:]...)...)
}
cmd := exec.CommandContext(ctx, runtime, args...)
cmd.Stdout = io.Discard
var stderr bytes.Buffer
cmd.Stderr = &limitedWriter{W: &stderr, N: maxWorkerStderrBytes}
if err := cmd.Run(); err != nil {
if errors.Is(ctx.Err(), gocontext.DeadlineExceeded) {
return nil, errInternal("ERR_PDF_TIMEOUT", "PDF export timed out.")
}
// Best-effort parse of worker error envelope from stderr JSONL.
if ue, ok := parseWorkerError(stderr.String()); ok {
return nil, ue
}
log.Error("pdfexport: worker failed: %v", err)
return nil, errInternal("ERR_PDF_WORKER_FAILED", "Failed to export PDF.")
}
pdf, err := os.ReadFile(outPath)
if err != nil || len(pdf) == 0 {
return nil, errInternal("ERR_PDF_WORKER_NO_OUTPUT", "Failed to export PDF.")
}
return pdf, userError{}
}
type limitedWriter struct {
W io.Writer
N int
}
func (w *limitedWriter) Write(p []byte) (int, error) {
if w.N <= 0 {
return len(p), nil
}
if len(p) > w.N {
p = p[:w.N]
}
n, err := w.W.Write(p)
w.N -= n
return n, err
}
func parseWorkerError(stderr string) (userError, bool) {
type workerErr struct {
ErrorID string `json:"error_id"`
Message string `json:"message"`
}
lines := strings.Split(stderr, "\n")
for i := len(lines) - 1; i >= 0; i-- {
line := strings.TrimSpace(lines[i])
if line == "" {
continue
}
var we workerErr
if err := json.Unmarshal([]byte(line), &we); err != nil {
continue
}
if we.ErrorID == "" || we.Message == "" {
continue
}
return errInternal(we.ErrorID, we.Message), true
}
return userError{}, false
}

View file

@ -0,0 +1,235 @@
// SPDX-License-Identifier: MIT
package pdfexport
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
"strings"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
)
const (
maxMarkdownBytes = 4 << 20
// ManifestSHA must be kept in sync with worker/pdf/manifest.json for cache key stability.
// It is used as part of the worker input and cache key (v0.1 determinism requirement).
ManifestSHA = "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
)
// ServeMarkdownPDF handles GET /{owner}/{repo}/raw/{path}?ref=<sha>&format=pdf
// It returns either PDF bytes or a safe JSON error envelope.
func ServeMarkdownPDF(ctx *context.Context) {
if strings.ToLower(ctx.FormString("format")) != "pdf" {
ctx.Status(http.StatusNotFound)
return
}
// Determine effective file path and commit.
effectivePath, commit, ue := resolvePathAndCommit(ctx)
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
if !markup.IsMarkupFile(path.Base(effectivePath), "markdown") {
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
return
}
entry, err := commit.GetTreeEntryByPath(effectivePath)
if err != nil {
if git.IsErrNotExist(err) {
writeUserError(ctx, errNotFound("ERR_PDF_NOT_FOUND", "File not found."))
return
}
log.Error("pdfexport: GetTreeEntryByPath: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
if entry.IsDir() || entry.IsSubModule() {
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
return
}
blob := entry.Blob()
rc, err := blob.DataAsync()
if err != nil {
log.Error("pdfexport: blob.DataAsync: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
defer func() { _ = rc.Close() }()
md, ue := readBounded(rc, maxMarkdownBytes)
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
cfg, ue := configFromSetting()
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
cfgHash, err := configHash(cfg)
if err != nil {
log.Error("pdfexport: config hash: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
cacheKey := fmt.Sprintf("%d|%s|%s|%s|%s", ctx.Repo.Repository.ID, commit.ID.String(), effectivePath, cfgHash, ManifestSHA)
pdf, cacheHit, ue := getOrGeneratePDF(cacheKey, workerInput{
Markdown: string(md),
RepoMeta: workerRepoMeta{
Owner: ctx.Repo.Repository.OwnerName,
Repo: ctx.Repo.Repository.Name,
Path: effectivePath,
RepoID: ctx.Repo.Repository.ID,
CommitSHA: commit.ID.String(),
CommitTimeRFC3339: commit.Committer.When.UTC().Format(time.RFC3339),
},
Config: workerConfigWrap{PDF: cfg},
ManifestSHA: ManifestSHA,
})
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
filename := path.Base(effectivePath)
if strings.HasSuffix(strings.ToLower(filename), ".md") {
filename = filename[:len(filename)-3]
}
if filename == "" {
filename = "document"
}
filename += ".pdf"
ctx.Resp.Header().Set("Content-Type", "application/pdf")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=%q", filename))
if cacheHit {
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "HIT")
} else {
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "MISS")
}
ctx.Resp.WriteHeader(http.StatusOK)
_, _ = ctx.Resp.Write(pdf)
}
func resolvePathAndCommit(ctx *context.Context) (string, *git.Commit, userError) {
if ctx.Repo.Repository == nil || ctx.Repo.GitRepo == nil {
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
ref := strings.TrimSpace(ctx.FormString("ref"))
commit := ctx.Repo.Commit
if ref != "" {
c, err := ctx.Repo.GitRepo.GetCommit(ref)
if err != nil {
if git.IsErrNotExist(err) {
return "", nil, errNotFound("ERR_PDF_REF_NOT_FOUND", "Commit not found.")
}
log.Error("pdfexport: GetCommit(ref): %v", err)
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
commit = c
}
if commit == nil {
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
// For the legacy /raw/* route with an explicit ref query parameter, treat the wildcard as the filepath
// (avoid legacy ref guessing for deterministic cache keys).
effectivePath := ctx.Repo.TreePath
if ref != "" && isRawLegacyPath(ctx) {
effectivePath = strings.TrimPrefix(ctx.Params("*"), "/")
}
if effectivePath == "" {
return "", nil, errBadRequest("ERR_PDF_BAD_REQUEST", "Invalid request.")
}
return effectivePath, commit, userError{}
}
func isRawLegacyPath(ctx *context.Context) bool {
p := ctx.Req.URL.Path
idx := strings.Index(p, "/raw/")
if idx < 0 {
return false
}
after := p[idx+len("/raw/"):]
return !(strings.HasPrefix(after, "branch/") || strings.HasPrefix(after, "tag/") || strings.HasPrefix(after, "commit/") || strings.HasPrefix(after, "blob/"))
}
func readBounded(r io.Reader, limit int64) ([]byte, userError) {
lr := io.LimitReader(r, limit+1)
b, err := io.ReadAll(lr)
if err != nil {
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
if int64(len(b)) > limit {
return nil, errBadRequest("ERR_PDF_TOO_LARGE", "File is too large to export.")
}
return b, userError{}
}
func getOrGeneratePDF(cacheKey string, input workerInput) ([]byte, bool, userError) {
cacheDir := filepath.Join(setting.AppDataPath, "pdfexport", "cache")
if err := os.MkdirAll(cacheDir, 0o750); err != nil {
log.Error("pdfexport: mkdir cache: %v", err)
return nil, false, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
cacheFile := filepath.Join(cacheDir, sha256Hex([]byte(cacheKey))+".pdf")
if b, err := os.ReadFile(cacheFile); err == nil && len(b) > 0 {
return b, true, userError{}
}
pdf, ue := runWorker(input)
if ue.ErrorID != "" {
return nil, false, ue
}
tmp := cacheFile + ".tmp"
if err := os.WriteFile(tmp, pdf, 0o640); err == nil {
_ = os.Rename(tmp, cacheFile)
} else {
log.Error("pdfexport: write cache: %v", err)
}
return pdf, false, userError{}
}
func sha256Hex(b []byte) string {
h := sha256.Sum256(b)
return hex.EncodeToString(h[:])
}
func writeUserError(ctx *context.Context, ue userError) {
status := ue.Status
if status == 0 {
status = http.StatusInternalServerError
}
ctx.Resp.Header().Set("Content-Type", "application/json; charset=utf-8")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.WriteHeader(status)
_ = json.NewEncoder(ctx.Resp).Encode(map[string]string{
"error_id": ue.ErrorID,
"message": ue.Message,
})
}

View file

@ -0,0 +1,45 @@
// SPDX-License-Identifier: MIT
package pdfexport
type workerInput struct {
Markdown string `json:"markdown"`
RepoMeta workerRepoMeta `json:"repoMeta"`
Config workerConfigWrap `json:"config"`
ManifestSHA string `json:"manifestSHA"`
}
type workerRepoMeta struct {
Owner string `json:"owner"`
Repo string `json:"repo"`
Path string `json:"path"`
RepoID interface{} `json:"repoID"`
CommitSHA string `json:"commitSHA"`
CommitTimeRFC3339 string `json:"commitTimeRFC3339"`
}
type workerConfigWrap struct {
PDF pdfConfig `json:"pdf"`
}
// pdfConfig matches the required worker config surface (JSON keys and values).
type pdfConfig struct {
Determinism string `json:"determinism"` // strict|relaxed
Timestamp string `json:"timestamp"` // commit_time|render_time
Typography string `json:"typography"` // basic|professional
Mermaid struct {
Strategy string `json:"strategy"` // fast|balanced|prestige
Caption bool `json:"caption"`
} `json:"mermaid"`
OrphansWidows struct {
Enforce bool `json:"enforce"`
} `json:"orphansWidows"`
Footer struct {
Enabled bool `json:"enabled"`
} `json:"footer"`
}

View file

@ -47,6 +47,9 @@
<a class="ui mini basic button" href="{{$.SymlinkURL}}" data-kind="follow-symlink">{{ctx.Locale.Tr "repo.file_follow"}}</a>
{{end}}
<a class="ui mini basic button" href="{{$.RawFileLink}}">{{ctx.Locale.Tr "repo.file_raw"}}</a>
{{if and $.PDFExportLink .IsMarkup (eq .MarkupType "markdown") .IsDisplayingRendered}}
<a class="ui mini basic button" href="{{$.PDFExportLink}}">{{ctx.Locale.Tr "repo.file_export_pdf"}}</a>
{{end}}
{{if not .IsViewCommit}}
<a class="ui mini basic button" href="{{.RepoLink}}/src/commit/{{PathEscape .CommitID}}/{{PathEscapeSegments .TreePath}}">{{ctx.Locale.Tr "repo.file_permalink"}}</a>
{{end}}

17
tests/fixtures/pdfexport/basic.md vendored Normal file
View file

@ -0,0 +1,17 @@
# PDF Export Fixture — Basic
KNOWN_UNIQUE_STRING: PDFX_BASIC_6d7d6fdd
This fixture ensures the PDF contains selectable text, including headings, paragraphs, lists and code.
## List
- Alpha
- Beta
- Gamma
## Code
```bash
echo "hello"
```

View file

@ -0,0 +1,11 @@
# PDF Export Fixture — External Resources Blocked
KNOWN_UNIQUE_STRING: PDFX_EXT_25d1c31b
This fixture contains external resources which must not be fetched in v0.1.
![external-image](https://example.com/forbidden.png)
[external-link](https://example.com/)
<script>alert("raw html disabled")</script>

12
tests/fixtures/pdfexport/mermaid.md vendored Normal file
View file

@ -0,0 +1,12 @@
# PDF Export Fixture — Mermaid
KNOWN_UNIQUE_STRING: PDFX_MERMAID_2a2dbf1d
```mermaid
flowchart LR
A[Alpha] --> B[Beta]
B --> C[Gamma]
C --> D[Delta]
```
The diagram labels must remain selectable PDF text.

View file

@ -0,0 +1,16 @@
# PDF Export Fixture — Mermaid Wide
KNOWN_UNIQUE_STRING: PDFX_WIDE_bf3a6c2e
```mermaid
flowchart LR
A[Start] --> B[Step 1]
B --> C[Step 2]
C --> D[Step 3]
D --> E[Step 4]
E --> F[Step 5]
F --> G[Step 6]
G --> H[Step 7]
H --> I[Step 8]
I --> J[Finish]
```

49
worker/pdf/Dockerfile Normal file
View file

@ -0,0 +1,49 @@
FROM node:20-bookworm-slim
ENV NODE_ENV=production \
TZ=UTC \
PUPPETEER_CACHE_DIR=/opt/puppeteer \
PUPPETEER_SKIP_DOWNLOAD=0
RUN apt-get update -y && apt-get install -y --no-install-recommends \
ca-certificates \
dumb-init \
qpdf \
poppler-utils \
libasound2 \
libatk-bridge2.0-0 \
libatk1.0-0 \
libcups2 \
libdrm2 \
libgbm1 \
libgtk-3-0 \
libnss3 \
libpango-1.0-0 \
libpangocairo-1.0-0 \
libx11-6 \
libx11-xcb1 \
libxcb1 \
libxcomposite1 \
libxdamage1 \
libxext6 \
libxfixes3 \
libxrandr2 \
libxrender1 \
libxshmfence1 \
libxkbcommon0 \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /opt/forgejo-pdf
COPY package.json package-lock.json ./
RUN npm ci --omit=dev
COPY src ./src
COPY scripts ./scripts
COPY assets ./assets
RUN node scripts/copy-assets.js \
&& node scripts/generate-manifest.js
ENTRYPOINT ["dumb-init", "--"]
CMD ["node", "src/index.js"]

View file

@ -0,0 +1,66 @@
@font-face {
font-family: "IBM Plex Sans";
font-style: normal;
font-weight: 400;
src: url("../fonts/ibm-plex-sans-latin-400-normal.woff2") format("woff2");
font-display: swap;
}
@font-face {
font-family: "IBM Plex Mono";
font-style: normal;
font-weight: 400;
src: url("../fonts/ibm-plex-mono-latin-400-normal.woff2") format("woff2");
font-display: swap;
}
@page {
size: A4;
margin: 20mm;
}
html, body {
font-family: "IBM Plex Sans", system-ui, -apple-system, "Segoe UI", Roboto, Arial, sans-serif;
color: #111827;
background: #ffffff;
}
p, li, a, h1, h2, h3, h4, h5, h6 {
overflow-wrap: anywhere;
word-break: break-word;
}
h1 { break-before: page; }
p { orphans: 3; widows: 3; }
pre, table, figure, blockquote { break-inside: avoid; }
pre, code {
font-family: "IBM Plex Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, "Liberation Mono", monospace;
}
pre {
background: #f3f4f6;
padding: 10px 12px;
border-radius: 6px;
overflow-wrap: anywhere;
white-space: pre-wrap;
}
.mermaid-figure {
margin: 12px 0;
}
.mermaid-figure svg {
max-width: 100%;
height: auto;
}
.mermaid-landscape {
break-before: page;
page: mermaidLandscape;
}
@page mermaidLandscape {
size: A4 landscape;
margin: 20mm;
}

View file

@ -0,0 +1,98 @@
@font-face {
font-family: "IBM Plex Sans";
font-style: normal;
font-weight: 400;
src: url("../fonts/ibm-plex-sans-latin-400-normal.woff2") format("woff2");
font-display: swap;
}
@font-face {
font-family: "IBM Plex Sans";
font-style: normal;
font-weight: 600;
src: url("../fonts/ibm-plex-sans-latin-600-normal.woff2") format("woff2");
font-display: swap;
}
@font-face {
font-family: "IBM Plex Mono";
font-style: normal;
font-weight: 400;
src: url("../fonts/ibm-plex-mono-latin-400-normal.woff2") format("woff2");
font-display: swap;
}
@page {
size: A4;
margin: 18mm 18mm 22mm 18mm;
}
html, body {
font-family: "IBM Plex Sans", system-ui, -apple-system, "Segoe UI", Roboto, Arial, sans-serif;
color: #0f172a;
background: #ffffff;
font-size: 11.5pt;
line-height: 1.45;
}
p, li, a, h1, h2, h3, h4, h5, h6 {
overflow-wrap: anywhere;
word-break: break-word;
}
h1, h2, h3, h4, h5, h6 {
font-weight: 600;
color: #0b1220;
}
h1 { break-before: page; font-size: 20pt; margin: 0 0 10pt; }
h2 { font-size: 15pt; margin: 18pt 0 8pt; }
h3 { font-size: 12.5pt; margin: 14pt 0 6pt; }
p { orphans: 3; widows: 3; margin: 0 0 10pt; }
pre, table, figure, blockquote { break-inside: avoid; }
pre, code {
font-family: "IBM Plex Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, "Liberation Mono", monospace;
}
pre {
background: #f1f5f9;
border: 1px solid #e2e8f0;
padding: 10px 12px;
border-radius: 8px;
overflow-wrap: anywhere;
white-space: pre-wrap;
}
blockquote {
margin: 12pt 0;
padding: 0 0 0 10pt;
border-left: 3px solid #cbd5e1;
color: #334155;
}
hr {
border: 0;
border-top: 1px solid #e2e8f0;
margin: 14pt 0;
}
.mermaid-figure {
margin: 12pt 0;
}
.mermaid-figure svg {
max-width: 100%;
height: auto;
}
.mermaid-landscape {
break-before: page;
page: mermaidLandscape;
}
@page mermaidLandscape {
size: A4 landscape;
margin: 20mm;
}

View file

@ -0,0 +1,4 @@
// This file is intentionally tiny; the heavy dependencies are loaded as file:// assets.
// It serves as a stable entrypoint for in-page render orchestration.
// (The actual work is executed by src/render_pdf.js via page.evaluate().)

38
worker/pdf/manifest.json Normal file
View file

@ -0,0 +1,38 @@
{
"versions": {
"worker": "0.1.0",
"node": "v20.19.6",
"puppeteer": "23.11.1",
"mermaid": "10.9.1",
"pagedjs": "0.4.3",
"markdownIt": "14.1.0",
"sanitizeHtml": "2.14.0",
"ibmPlexSans": "5.2.6",
"ibmPlexMono": "5.2.6",
"lockfileVersion": 3
},
"chromium": {
"sha256": "8b60627021064a6a1ecc3cf7f41d528747ea7f95713ca72b369833fa89638b0a",
"version": null
},
"code": {
"src/index.js": "e0add4a9e0adba7752b6a70b41dc9592aee937045ceba4e81b4ef2c2d5d6f935",
"src/render_pdf.js": "915fadbb9934fddf8f842d28459375e2b693eb6c646507cbeb7db05387038eb1",
"src/validate.js": "42fcc411306036168ad6ae68626d07cb6b5175814559f4055f98fc2925bfdb55",
"src/errors.js": "4afcb10eb0b5cfeaba69599e3d0d2d42cdc68655a93d20dad3a1508585df1529",
"src/logger.js": "7ec580c7154036fda4fa9617442a5d9ae5e1839139010c78e555e0c37b7f11a2",
"scripts/test-fixtures.js": "4694d98297ae73189b602c2d2d7941b8354e92e17ce4ba3c71f72db1a4408dd7",
"scripts/copy-assets.js": "e0b9075ebef4962bc36738136d9a5f99b6b58e959234e944fd4b4f429971fbac",
"scripts/generate-manifest.js": "711a9a1677728b44aeb80cb7f87c29ef04f1a74e9a46e1028b76878fbe25906b"
},
"assets": {
"assets/js/mermaid.min.js": "61b335a46df05a7ce1c98378f60e5f3e77a7fb608a1056997e8a649304a936d6",
"assets/js/paged.polyfill.js": "f59f361802416c770d549a647958649af2cf6601999924bc00e4f507dad5269f",
"assets/css/basic.css": "efcf1befdb7d9708981530cc0ec685154551712dbf369f05c3dbd61627315230",
"assets/css/professional.css": "02bd01ad088ba4d07e9a343823406e7a787222f12fbe6c528026a2f80c8106cc",
"assets/fonts/ibm-plex-sans-latin-400-normal.woff2": "3b646991d30055a93a4ecc499713d4347953a74a947ecab435ab72070cbdab0e",
"assets/fonts/ibm-plex-sans-latin-600-normal.woff2": "8960851d691c054ed38e259bdcf1a6190d157b4203ed5bb32c632a863fb8ec2f",
"assets/fonts/ibm-plex-mono-latin-400-normal.woff2": "3c5a451f9ec27a354b0c2bcca636c6ec17a651281aabf29f8427e210a1d31e85"
},
"manifest_sha": "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
}

2950
worker/pdf/package-lock.json generated Normal file

File diff suppressed because it is too large Load diff

24
worker/pdf/package.json Normal file
View file

@ -0,0 +1,24 @@
{
"name": "forgejo-pdf-worker",
"version": "0.1.0",
"private": true,
"type": "commonjs",
"engines": {
"node": ">=20"
},
"scripts": {
"lint": "node -c src/index.js",
"copy-assets": "node scripts/copy-assets.js",
"generate-manifest": "node scripts/generate-manifest.js",
"test:fixtures": "node scripts/test-fixtures.js"
},
"dependencies": {
"@fontsource/ibm-plex-mono": "5.2.6",
"@fontsource/ibm-plex-sans": "5.2.6",
"markdown-it": "14.1.0",
"mermaid": "10.9.1",
"pagedjs": "0.4.3",
"puppeteer": "23.11.1",
"sanitize-html": "2.14.0"
}
}

View file

@ -0,0 +1,43 @@
const fs = require("node:fs");
const path = require("node:path");
function copyFile(src, dst) {
fs.mkdirSync(path.dirname(dst), { recursive: true });
fs.copyFileSync(src, dst);
}
function pkgRoot(pkg) {
try {
return path.dirname(require.resolve(`${pkg}/package.json`));
} catch {
const entry = require.resolve(pkg);
let dir = path.dirname(entry);
for (let i = 0; i < 8; i++) {
if (fs.existsSync(path.join(dir, "package.json"))) return dir;
dir = path.dirname(dir);
}
throw new Error(`Unable to locate package root: ${pkg}`);
}
}
function main() {
const root = path.resolve(__dirname, "..");
const assets = path.join(root, "assets");
// Mermaid bundle (browser).
// Keep it local; no external fetches at runtime.
copyFile(path.join(pkgRoot("mermaid"), "dist/mermaid.min.js"), path.join(assets, "js/mermaid.min.js"));
// Paged.js bundle.
copyFile(path.join(pkgRoot("pagedjs"), "dist/paged.polyfill.js"), path.join(assets, "js/paged.polyfill.js"));
// Fonts: only include latin subsets required by the default CSS.
const sansDir = path.join(pkgRoot("@fontsource/ibm-plex-sans"), "files");
const monoDir = path.join(pkgRoot("@fontsource/ibm-plex-mono"), "files");
copyFile(path.join(sansDir, "ibm-plex-sans-latin-400-normal.woff2"), path.join(assets, "fonts/ibm-plex-sans-latin-400-normal.woff2"));
copyFile(path.join(sansDir, "ibm-plex-sans-latin-600-normal.woff2"), path.join(assets, "fonts/ibm-plex-sans-latin-600-normal.woff2"));
copyFile(path.join(monoDir, "ibm-plex-mono-latin-400-normal.woff2"), path.join(assets, "fonts/ibm-plex-mono-latin-400-normal.woff2"));
}
main();

View file

@ -0,0 +1,88 @@
const crypto = require("node:crypto");
const fs = require("node:fs");
const path = require("node:path");
function sha256File(p) {
const h = crypto.createHash("sha256");
h.update(fs.readFileSync(p));
return h.digest("hex");
}
function sha256String(s) {
return crypto.createHash("sha256").update(s).digest("hex");
}
function tryReadJSON(p) {
try {
return JSON.parse(fs.readFileSync(p, "utf8"));
} catch {
return null;
}
}
function main() {
const root = path.resolve(__dirname, "..");
const pkg = tryReadJSON(path.join(root, "package.json")) || {};
const lock = tryReadJSON(path.join(root, "package-lock.json")) || {};
const code = {
"src/index.js": sha256File(path.join(root, "src/index.js")),
"src/render_pdf.js": sha256File(path.join(root, "src/render_pdf.js")),
"src/validate.js": sha256File(path.join(root, "src/validate.js")),
"src/errors.js": sha256File(path.join(root, "src/errors.js")),
"src/logger.js": sha256File(path.join(root, "src/logger.js")),
"scripts/test-fixtures.js": sha256File(path.join(root, "scripts/test-fixtures.js")),
"scripts/copy-assets.js": sha256File(path.join(root, "scripts/copy-assets.js")),
"scripts/generate-manifest.js": sha256File(path.join(root, "scripts/generate-manifest.js"))
};
const assets = {
"assets/js/mermaid.min.js": sha256File(path.join(root, "assets/js/mermaid.min.js")),
"assets/js/paged.polyfill.js": sha256File(path.join(root, "assets/js/paged.polyfill.js")),
"assets/css/basic.css": sha256File(path.join(root, "assets/css/basic.css")),
"assets/css/professional.css": sha256File(path.join(root, "assets/css/professional.css")),
"assets/fonts/ibm-plex-sans-latin-400-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-sans-latin-400-normal.woff2")),
"assets/fonts/ibm-plex-sans-latin-600-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-sans-latin-600-normal.woff2")),
"assets/fonts/ibm-plex-mono-latin-400-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-mono-latin-400-normal.woff2"))
};
let chromePath = null;
let chromeSha = null;
let chromeVersion = null;
try {
const puppeteer = require("puppeteer");
chromePath = puppeteer.executablePath();
chromeSha = sha256File(chromePath);
if (typeof puppeteer.browserVersion === "function") {
chromeVersion = puppeteer.browserVersion();
}
} catch {
// noop
}
const deps = (pkg && pkg.dependencies) || {};
const manifestCore = {
versions: {
worker: pkg.version || "0.0.0",
node: process.version,
puppeteer: deps.puppeteer || null,
mermaid: deps.mermaid || null,
pagedjs: deps.pagedjs || null,
markdownIt: deps["markdown-it"] || null,
sanitizeHtml: deps["sanitize-html"] || null,
ibmPlexSans: deps["@fontsource/ibm-plex-sans"] || null,
ibmPlexMono: deps["@fontsource/ibm-plex-mono"] || null,
lockfileVersion: lock.lockfileVersion || null
},
chromium: chromePath ? { sha256: chromeSha, version: chromeVersion || null } : null,
code,
assets
};
const canonical = JSON.stringify(manifestCore);
const manifest = { ...manifestCore, manifest_sha: sha256String(canonical) };
fs.writeFileSync(path.join(root, "manifest.json"), JSON.stringify(manifest, null, 2));
}
main();

View file

@ -0,0 +1,133 @@
const fs = require("node:fs");
const path = require("node:path");
const { spawnSync, execFileSync } = require("node:child_process");
function parseArgs(argv) {
const out = { fixtures: "/fixtures", outDir: "/tmp/pdf-fixtures-out" };
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === "--fixtures") out.fixtures = argv[++i];
else if (a === "--out") out.outDir = argv[++i];
}
return out;
}
function readManifestSHA() {
const p = "/opt/forgejo-pdf/manifest.json";
const b = fs.readFileSync(p, "utf8");
const j = JSON.parse(b);
if (!j.manifest_sha || typeof j.manifest_sha !== "string") {
throw new Error("manifest.json missing manifest_sha");
}
return j.manifest_sha;
}
function extractKnownUniqueString(md) {
const m = md.match(/KNOWN_UNIQUE_STRING:\s*([A-Za-z0-9_\-\.]+)/);
return m ? m[1] : null;
}
function containsMermaid(md) {
return /```mermaid[\s\S]*?```/m.test(md);
}
function run(cmd, args, opts = {}) {
const res = spawnSync(cmd, args, { encoding: "utf8", ...opts });
return res;
}
function main() {
const { fixtures, outDir } = parseArgs(process.argv);
fs.mkdirSync(outDir, { recursive: true });
const manifestSHA = readManifestSHA();
const pdfConfig = {
pdf: {
determinism: "strict",
timestamp: "commit_time",
typography: "professional",
mermaid: { strategy: "balanced", caption: false },
orphansWidows: { enforce: true },
footer: { enabled: true }
}
};
const files = fs
.readdirSync(fixtures)
.filter((f) => f.endsWith(".md"))
.sort();
if (files.length === 0) {
throw new Error("no fixture markdown files found");
}
for (const f of files) {
const mdPath = path.join(fixtures, f);
const md = fs.readFileSync(mdPath, "utf8");
const expected = extractKnownUniqueString(md);
if (!expected) throw new Error(`fixture missing KNOWN_UNIQUE_STRING: ${f}`);
const input = {
markdown: md,
repoMeta: {
owner: "fixture",
repo: "forgejo-pdf",
path: f,
repoID: 1,
commitSHA: "0123456789abcdef0123456789abcdef01234567",
commitTimeRFC3339: "2020-01-02T03:04:05Z"
},
config: pdfConfig,
manifestSHA
};
const jobDir = fs.mkdtempSync(path.join(outDir, "job-"));
const inPath = path.join(jobDir, "input.json");
const outPath = path.join(jobDir, "output.pdf");
fs.writeFileSync(inPath, JSON.stringify(input), "utf8");
const res = run("node", ["src/index.js", "--in", inPath, "--out", outPath], {
cwd: "/opt/forgejo-pdf"
});
if (res.status !== 0) {
throw new Error(`worker failed for ${f}: ${res.stderr.trim() || res.stdout.trim()}`);
}
const logs = res.stderr
.split("\n")
.map((l) => l.trim())
.filter(Boolean)
.map((l) => {
try {
return JSON.parse(l);
} catch {
return null;
}
})
.filter(Boolean);
const done = logs.findLast ? logs.findLast((l) => l.event === "done") : logs.reverse().find((l) => l.event === "done");
if (!done) throw new Error(`missing done log for ${f}`);
if (done.blocked_requests !== 0) throw new Error(`blocked_requests != 0 for ${f}`);
const hasMermaid = containsMermaid(md);
if (hasMermaid && (!Number.isFinite(done.mermaid_count) || done.mermaid_count < 1)) {
throw new Error(`expected mermaid_count >= 1 for ${f}`);
}
if (!hasMermaid && done.mermaid_count !== 0) {
throw new Error(`expected mermaid_count == 0 for ${f}`);
}
execFileSync("qpdf", ["--check", outPath], { stdio: "inherit" });
const text = execFileSync("pdftotext", [outPath, "-"], { encoding: "utf8" });
const normalized = text.replace(/\s+/g, "");
const expectedNorm = expected.replace(/\s+/g, "");
if (!normalized.includes(expectedNorm)) {
throw new Error(`pdftotext missing expected marker for ${f}: ${expected}`);
}
}
}
main();

20
worker/pdf/src/errors.js Normal file
View file

@ -0,0 +1,20 @@
const ERROR = Object.freeze({
BAD_INPUT: { error_id: "ERR_BAD_INPUT", message: "Invalid input." },
BAD_CONFIG: { error_id: "ERR_BAD_CONFIG", message: "Invalid configuration." },
NETWORK_ATTEMPT: { error_id: "ERR_NETWORK_ATTEMPT", message: "External network access is not allowed." },
MERMAID_NO_TEXT: { error_id: "ERR_MERMAID_NO_TEXT", message: "Mermaid output must contain selectable text." },
MERMAID_FOREIGN_OBJECT: { error_id: "ERR_MERMAID_FOREIGN_OBJECT", message: "Mermaid output contains forbidden elements." },
TIMEOUT: { error_id: "ERR_TIMEOUT", message: "Render timed out." },
INTERNAL: { error_id: "ERR_INTERNAL", message: "Render failed." }
});
function asFailure(errorDef, detailsCode) {
return {
error_id: errorDef.error_id,
message: errorDef.message,
...(detailsCode ? { details_code: detailsCode } : {})
};
}
module.exports = { ERROR, asFailure };

73
worker/pdf/src/index.js Normal file
View file

@ -0,0 +1,73 @@
const fs = require("node:fs");
const path = require("node:path");
const { log } = require("./logger");
const { ERROR, asFailure } = require("./errors");
const { validateInput } = require("./validate");
const { renderToPDF } = require("./render_pdf");
function parseArgs(argv) {
const args = { inPath: null, outPath: null };
for (let i = 2; i < argv.length; i++) {
const a = argv[i];
if (a === "--in") args.inPath = argv[++i];
else if (a === "--out") args.outPath = argv[++i];
}
return args;
}
async function main() {
const { inPath, outPath } = parseArgs(process.argv);
if (!inPath || !outPath) {
log(asFailure(ERROR.BAD_INPUT, "missing_cli_args"));
process.exitCode = 2;
return;
}
let inputRaw;
try {
inputRaw = fs.readFileSync(inPath, "utf8");
} catch {
log(asFailure(ERROR.BAD_INPUT, "read_input_failed"));
process.exitCode = 2;
return;
}
let input;
try {
input = JSON.parse(inputRaw);
} catch {
log(asFailure(ERROR.BAD_INPUT, "parse_input_failed"));
process.exitCode = 2;
return;
}
const bad = validateInput(input);
if (bad) {
log(asFailure(bad, "validate_input_failed"));
process.exitCode = 2;
return;
}
const started = process.hrtime.bigint();
log({ event: "start", repo: `${input.repoMeta.owner}/${input.repoMeta.repo}`, path: input.repoMeta.path });
try {
const { pdf, blockedRequests, mermaidCount } = await renderToPDF(input);
fs.mkdirSync(path.dirname(outPath), { recursive: true });
fs.writeFileSync(outPath, pdf);
const elapsedMs = Number((process.hrtime.bigint() - started) / 1000000n);
log({ event: "done", bytes: pdf.length, ms: elapsedMs, blocked_requests: blockedRequests, mermaid_count: mermaidCount });
} catch (e) {
const safe = e && typeof e === "object" && e.safeError ? e.safeError : null;
if (safe) {
log(safe);
process.exitCode = 1;
return;
}
log(asFailure(ERROR.INTERNAL, "unhandled_exception"));
process.exitCode = 1;
}
}
main();

6
worker/pdf/src/logger.js Normal file
View file

@ -0,0 +1,6 @@
function log(obj) {
process.stderr.write(`${JSON.stringify(obj)}\n`);
}
module.exports = { log };

View file

@ -0,0 +1,405 @@
const crypto = require("node:crypto");
const fs = require("node:fs");
const path = require("node:path");
const sanitizeHtml = require("sanitize-html");
const MarkdownIt = require("markdown-it");
const puppeteer = require("puppeteer");
const { ERROR, asFailure } = require("./errors");
function safeThrow(errorDef, detailsCode) {
const e = new Error(errorDef.error_id);
e.safeError = asFailure(errorDef, detailsCode);
throw e;
}
function sha256Hex(text) {
return crypto.createHash("sha256").update(text).digest("hex");
}
function normalizeMarkdown(md) {
return md.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
}
function markdownToHTML(markdown) {
const md = new MarkdownIt({
html: false,
linkify: true
});
const html = md.render(markdown);
// Sanitize and remove images/external resources for v0.1.
const cleaned = sanitizeHtml(html, {
allowedTags: sanitizeHtml.defaults.allowedTags.filter((t) => t !== "img"),
allowedAttributes: {
a: ["href", "title"],
code: ["class"],
pre: ["class"],
span: ["class"],
p: ["class"],
h1: ["id"],
h2: ["id"],
h3: ["id"],
h4: ["id"],
h5: ["id"],
h6: ["id"]
},
allowVulnerableTags: false,
transformTags: {
a: (tagName, attribs) => {
const href = attribs.href || "";
const lower = href.trim().toLowerCase();
const forbidden =
lower.startsWith("http:") ||
lower.startsWith("https:") ||
lower.startsWith("ws:") ||
lower.startsWith("wss:") ||
lower.startsWith("mailto:") ||
lower.startsWith("javascript:") ||
lower.startsWith("data:") ||
lower.startsWith("file:");
if (forbidden) return { tagName: "a", attribs: { rel: "nofollow" } };
return { tagName, attribs: { href, rel: "nofollow" } };
}
}
});
return cleaned;
}
function htmlTemplate({ bodyHTML, assetsDir, cssName, footerCSS }) {
const cssPath = `file://${path.join(assetsDir, "css", cssName)}`;
const runtimePath = `file://${path.join(assetsDir, "js", "runtime.js")}`;
const mermaidPath = `file://${path.join(assetsDir, "js", "mermaid.min.js")}`;
const pagedPath = `file://${path.join(assetsDir, "js", "paged.polyfill.js")}`;
return `<!doctype html>
<html>
<head>
<meta charset="utf-8"/>
<meta name="viewport" content="width=device-width, initial-scale=1"/>
<link rel="stylesheet" href="${cssPath}"/>
<style>${footerCSS}</style>
</head>
<body>
<main id="content">${bodyHTML}</main>
<script src="${mermaidPath}"></script>
<script>window.PagedConfig = { auto: false };</script>
<script src="${pagedPath}"></script>
<script src="${runtimePath}"></script>
</body>
</html>`;
}
function footerCSSFor(input) {
const pdf = input.config.pdf;
if (!pdf.footer.enabled) return "";
const repo = `${input.repoMeta.owner}/${input.repoMeta.repo}`;
const sha = input.repoMeta.commitSHA.slice(0, 12);
let date = "";
if (pdf.timestamp === "commit_time") {
date = input.repoMeta.commitTimeRFC3339;
} else {
date = new Date().toISOString();
}
const left = cssString(repo);
const center = cssString(`${sha}${date}`);
return `
@page {
@bottom-left { content: "${left}"; }
@bottom-center { content: "${center}"; }
@bottom-right { content: counter(page); }
}`;
}
function cssString(s) {
return String(s).replace(/\\/g, "\\\\").replace(/"/g, '\\"');
}
async function renderToPDF(input) {
const assetsDir = "/opt/forgejo-pdf/assets";
const pdf = input.config.pdf;
const cssName = pdf.typography === "basic" ? "basic.css" : "professional.css";
const bodyHTML = markdownToHTML(normalizeMarkdown(input.markdown));
const html = htmlTemplate({ bodyHTML, assetsDir, cssName, footerCSS: footerCSSFor(input) });
const userDataDir = fs.mkdtempSync("/tmp/chrome-profile-");
const pageDir = fs.mkdtempSync("/tmp/pdf-page-");
const htmlPath = path.join(pageDir, "index.html");
fs.writeFileSync(htmlPath, html, "utf8");
const browser = await puppeteer.launch({
headless: "new",
args: [
"--no-sandbox",
"--disable-dev-shm-usage",
"--allow-file-access-from-files",
`--user-data-dir=${userDataDir}`
]
});
try {
const page = await browser.newPage();
let lastPageError = null;
page.on("pageerror", (err) => {
lastPageError = err && typeof err.message === "string" ? err.message : String(err);
});
page.on("console", (msg) => {
if (msg.type && msg.type() === "error") {
lastPageError = msg.text ? msg.text() : String(msg);
}
});
await page.setRequestInterception(true);
let blockedRequests = 0;
let blockedURL = null;
page.on("request", (req) => {
const u = req.url();
if (u.startsWith("file:")) return req.continue();
blockedRequests++;
if (!blockedURL) blockedURL = u;
return req.abort();
});
await page.goto(`file://${htmlPath}`, { waitUntil: "load" });
if (blockedRequests > 0) safeThrow(ERROR.NETWORK_ATTEMPT, `blocked:${blockedURL || "non_file_request"}`);
// Mermaid + Paged.js rendering happens in-page.
const renderResult = await page.evaluate(async (payload) => {
try {
// Minimal SHA-256 (hex) implementation for deterministic IDs.
// Avoids relying on SubtleCrypto availability for file:// contexts.
function sha256HexLocal(ascii) {
function rightRotate(value, amount) {
return (value >>> amount) | (value << (32 - amount));
}
// Normalize to a byte-string (UTF-8) for consistent hashing across unicode input.
ascii = unescape(encodeURIComponent(ascii));
const maxWord = Math.pow(2, 32);
let result = "";
const words = [];
const asciiBitLength = ascii.length * 8;
let hash = sha256HexLocal.h || [];
let k = sha256HexLocal.k || [];
let primeCounter = k.length;
const isComposite = {};
for (let candidate = 2; primeCounter < 64; candidate++) {
if (!isComposite[candidate]) {
for (let i = 0; i < 313; i += candidate) isComposite[i] = candidate;
hash[primeCounter] = (Math.pow(candidate, 0.5) * maxWord) | 0;
k[primeCounter++] = (Math.pow(candidate, 1 / 3) * maxWord) | 0;
}
}
sha256HexLocal.h = hash;
sha256HexLocal.k = k;
ascii += "\x80";
while ((ascii.length % 64) - 56) ascii += "\x00";
for (let i = 0; i < ascii.length; i++) {
const j = ascii.charCodeAt(i);
words[i >> 2] |= j << ((3 - i) % 4) * 8;
}
words[words.length] = (asciiBitLength / maxWord) | 0;
words[words.length] = asciiBitLength;
for (let j = 0; j < words.length; ) {
const w = words.slice(j, (j += 16));
const oldHash = hash.slice(0);
for (let i = 0; i < 64; i++) {
const w15 = w[i - 15];
const w2 = w[i - 2];
const a = hash[0];
const e = hash[4];
const temp1 =
hash[7] +
(rightRotate(e, 6) ^ rightRotate(e, 11) ^ rightRotate(e, 25)) +
((e & hash[5]) ^ (~e & hash[6])) +
k[i] +
(w[i] =
i < 16
? w[i]
: (w[i - 16] +
(rightRotate(w15, 7) ^ rightRotate(w15, 18) ^ (w15 >>> 3)) +
w[i - 7] +
(rightRotate(w2, 17) ^ rightRotate(w2, 19) ^ (w2 >>> 10))) |
0);
const temp2 =
(rightRotate(a, 2) ^ rightRotate(a, 13) ^ rightRotate(a, 22)) +
((a & hash[1]) ^ (a & hash[2]) ^ (hash[1] & hash[2]));
hash = [(temp1 + temp2) | 0].concat(hash);
hash[4] = (hash[4] + temp1) | 0;
hash.pop();
}
for (let i = 0; i < 8; i++) {
hash[i] = (hash[i] + oldHash[i]) | 0;
}
}
for (let i = 0; i < 8; i++) {
for (let j = 3; j + 1; j--) {
const b = (hash[i] >> (j * 8)) & 255;
result += (b < 16 ? "0" : "") + b.toString(16);
}
}
return result;
}
function sanitizeSVG(svgText) {
const parser = new DOMParser();
const doc = parser.parseFromString(svgText, "image/svg+xml");
const svg = doc.documentElement;
const forbidden = svg.querySelector("foreignObject");
if (forbidden) throw { error_id: "ERR_MERMAID_FOREIGN_OBJECT", message: "Mermaid output contains forbidden elements." };
const scripts = svg.querySelectorAll("script");
for (const s of scripts) s.remove();
for (const el of svg.querySelectorAll("*")) {
for (const attr of Array.from(el.attributes)) {
const name = attr.name;
const value = attr.value || "";
if (name.startsWith("on")) el.removeAttribute(name);
if ((name === "href" || name === "xlink:href") && !value.startsWith("#")) el.removeAttribute(name);
}
}
const hasText = svg.querySelector("text, tspan");
if (!hasText) throw { error_id: "ERR_MERMAID_NO_TEXT", message: "Mermaid output must contain selectable text." };
return svg.outerHTML;
}
function shouldLandscape(svgEl) {
const vb = svgEl.getAttribute("viewBox");
if (!vb) return false;
const parts = vb.trim().split(/\s+/).map(Number);
if (parts.length !== 4 || parts.some((n) => !Number.isFinite(n))) return false;
const w = parts[2];
const h = parts[3];
if (h <= 0) return false;
const aspect = w / h;
return aspect > 1.15;
}
// Determinism guards.
Math.random = () => 0.5;
if (payload.pdf.determinism === "strict") {
const fixed = Date.parse(payload.repoMeta.commitTimeRFC3339);
if (Number.isFinite(fixed)) Date.now = () => fixed;
}
// Render Mermaid fences found in code blocks.
const blocks = Array.from(document.querySelectorAll("pre > code"));
const mermaidBlocks = blocks.filter((c) => (c.className || "").includes("language-mermaid"));
if (typeof mermaid === "undefined" || !mermaid) throw { error_id: "ERR_INTERNAL", message: "Render failed." };
mermaid.initialize({
startOnLoad: false,
securityLevel: "strict",
htmlLabels: false,
flowchart: { htmlLabels: false },
sequence: { htmlLabels: false },
state: { htmlLabels: false },
class: { htmlLabels: false },
fontFamily: "IBM Plex Sans",
theme: "base",
themeVariables: {
fontFamily: "IBM Plex Sans",
primaryColor: "#ffffff",
primaryTextColor: "#111827",
lineColor: "#6b7280",
secondaryColor: "#f3f4f6",
tertiaryColor: "#ffffff"
}
});
for (let i = 0; i < mermaidBlocks.length; i++) {
const codeEl = mermaidBlocks[i];
const diagramText = codeEl.textContent || "";
const id = `m-${sha256HexLocal(payload.repoMeta.commitSHA + "|" + payload.repoMeta.path + "|" + i + "|" + diagramText)}`;
const { svg } = await mermaid.render(id, diagramText);
const clean = sanitizeSVG(svg);
const container = document.createElement("figure");
container.className = "mermaid-figure";
container.innerHTML = clean;
const svgEl = container.querySelector("svg");
if (svgEl && shouldLandscape(svgEl)) {
container.classList.add("mermaid-landscape");
}
const pre = codeEl.parentElement;
pre.replaceWith(container);
}
if (window.PagedPolyfill && window.PagedPolyfill.preview) {
await window.PagedPolyfill.preview();
}
return { ok: true, mermaid_count: mermaidBlocks.length };
} catch (err) {
if (err && typeof err === "object" && err.error_id && err.message) {
return { ok: false, error_id: err.error_id, message: err.message };
}
return { ok: false, error_id: "ERR_INTERNAL", message: "Render failed." };
}
}, { pdf: input.config.pdf, repoMeta: input.repoMeta });
if (!renderResult || renderResult.ok !== true || typeof renderResult.mermaid_count !== "number") {
if (renderResult && renderResult.ok === false && renderResult.error_id && renderResult.message) {
const err = new Error(renderResult.error_id);
err.safeError = { error_id: renderResult.error_id, message: renderResult.message };
throw err;
}
safeThrow(ERROR.INTERNAL, "render_result_invalid");
}
const mermaidCount = renderResult.mermaid_count;
const pdfBuf = await page.pdf({
format: "A4",
printBackground: true,
preferCSSPageSize: true
});
return { pdf: Buffer.from(pdfBuf), blockedRequests, mermaidCount };
} catch (e) {
if (e && typeof e === "object" && e.error_id && e.message) {
const err = new Error(e.error_id);
err.safeError = { error_id: e.error_id, message: e.message };
throw err;
}
if (e && typeof e === "object" && e.safeError) throw e;
const msg = e && typeof e === "object" && typeof e.message === "string" ? e.message : "";
const hint = msg || (typeof lastPageError === "string" ? lastPageError : "");
const details = hint
? `render_failed:${hint.slice(0, 160).replace(/[^A-Za-z0-9_.:/\\-]+/g, "_")}`
: "render_failed";
safeThrow(ERROR.INTERNAL, details);
} finally {
try {
await browser.close();
} catch {}
try {
fs.rmSync(userDataDir, { recursive: true, force: true });
fs.rmSync(pageDir, { recursive: true, force: true });
} catch {}
}
}
module.exports = { renderToPDF };

View file

@ -0,0 +1,37 @@
const { ERROR } = require("./errors");
function isObject(value) {
return value && typeof value === "object" && !Array.isArray(value);
}
function validateInput(input) {
if (!isObject(input)) return ERROR.BAD_INPUT;
if (typeof input.markdown !== "string") return ERROR.BAD_INPUT;
if (!isObject(input.repoMeta)) return ERROR.BAD_INPUT;
if (!isObject(input.config) || !isObject(input.config.pdf)) return ERROR.BAD_INPUT;
if (typeof input.manifestSHA !== "string") return ERROR.BAD_INPUT;
const pdf = input.config.pdf;
if (!["strict", "relaxed"].includes(pdf.determinism)) return ERROR.BAD_CONFIG;
if (!["commit_time", "render_time"].includes(pdf.timestamp)) return ERROR.BAD_CONFIG;
if (!["basic", "professional"].includes(pdf.typography)) return ERROR.BAD_CONFIG;
if (!isObject(pdf.mermaid)) return ERROR.BAD_CONFIG;
if (!["fast", "balanced", "prestige"].includes(pdf.mermaid.strategy)) return ERROR.BAD_CONFIG;
if (typeof pdf.mermaid.caption !== "boolean") return ERROR.BAD_CONFIG;
if (!isObject(pdf.orphansWidows) || typeof pdf.orphansWidows.enforce !== "boolean") return ERROR.BAD_CONFIG;
if (!isObject(pdf.footer) || typeof pdf.footer.enabled !== "boolean") return ERROR.BAD_CONFIG;
if (pdf.determinism === "strict" && pdf.timestamp !== "commit_time") return ERROR.BAD_CONFIG;
const rm = input.repoMeta;
if (typeof rm.owner !== "string") return ERROR.BAD_INPUT;
if (typeof rm.repo !== "string") return ERROR.BAD_INPUT;
if (typeof rm.path !== "string") return ERROR.BAD_INPUT;
if (typeof rm.commitSHA !== "string") return ERROR.BAD_INPUT;
if (typeof rm.commitTimeRFC3339 !== "string") return ERROR.BAD_INPUT;
return null;
}
module.exports = { validateInput };