Add server-side Markdown→PDF export (v0.1)
Some checks failed
pdfexport / pdfexport-worker-fixtures (push) Has been cancelled
Some checks failed
pdfexport / pdfexport-worker-fixtures (push) Has been cancelled
This commit is contained in:
parent
5ec1f7f363
commit
1ce1370983
31 changed files with 4698 additions and 1 deletions
20
.forgejo/workflows/pdfexport.yml
Normal file
20
.forgejo/workflows/pdfexport.yml
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
name: pdfexport
|
||||
|
||||
on:
|
||||
push:
|
||||
pull_request:
|
||||
|
||||
jobs:
|
||||
pdfexport-worker-fixtures:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Build worker image
|
||||
run: docker build -t forgejo/pdf-worker:v0.1 ./worker/pdf
|
||||
- name: Run fixtures (no network)
|
||||
run: |
|
||||
docker run --rm --network=none \
|
||||
-v "$PWD/tests/fixtures/pdfexport:/fixtures:ro" \
|
||||
forgejo/pdf-worker:v0.1 \
|
||||
node scripts/test-fixtures.js --fixtures /fixtures
|
||||
|
||||
51
modules/setting/pdf.go
Normal file
51
modules/setting/pdf.go
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package setting
|
||||
|
||||
import "strings"
|
||||
|
||||
// PDF holds server-side configuration for Markdown → PDF export.
|
||||
//
|
||||
// NOTE: The worker-facing config surface is represented in services/pdfexport and
|
||||
// is always marshaled with the exact JSON keys required by the worker contract.
|
||||
var PDF = struct {
|
||||
Enabled bool
|
||||
Determinism string
|
||||
Timestamp string
|
||||
Typography string
|
||||
|
||||
OrphansWidowsEnforce bool `ini:"ORPHANS_WIDOWS_ENFORCE"`
|
||||
FooterEnabled bool `ini:"FOOTER_ENABLED"`
|
||||
|
||||
Mermaid struct {
|
||||
Strategy string
|
||||
Caption bool
|
||||
}
|
||||
|
||||
// ContainerRuntime is the executable used to run the worker container.
|
||||
// Typical values: docker, podman.
|
||||
ContainerRuntime string `ini:"CONTAINER_RUNTIME"`
|
||||
// WorkerImage is the container image reference to run.
|
||||
WorkerImage string `ini:"WORKER_IMAGE"`
|
||||
}{
|
||||
Enabled: false,
|
||||
Determinism: "strict",
|
||||
Timestamp: "commit_time",
|
||||
Typography: "professional",
|
||||
OrphansWidowsEnforce: true,
|
||||
FooterEnabled: true,
|
||||
ContainerRuntime: "podman",
|
||||
WorkerImage: "localhost/forgejo/pdf-worker:v0.1",
|
||||
Mermaid: struct{ Strategy string; Caption bool }{Strategy: "balanced", Caption: false},
|
||||
}
|
||||
|
||||
func loadPDFFrom(rootCfg ConfigProvider) {
|
||||
mustMapSetting(rootCfg, "pdf", &PDF)
|
||||
// Allow nested mermaid configuration under [pdf.mermaid].
|
||||
mustMapSetting(rootCfg, "pdf.mermaid", &PDF.Mermaid)
|
||||
|
||||
PDF.Determinism = strings.ToLower(PDF.Determinism)
|
||||
PDF.Timestamp = strings.ToLower(PDF.Timestamp)
|
||||
PDF.Typography = strings.ToLower(PDF.Typography)
|
||||
PDF.Mermaid.Strategy = strings.ToLower(PDF.Mermaid.Strategy)
|
||||
}
|
||||
|
|
@ -155,6 +155,7 @@ func loadCommonSettingsFrom(cfg ConfigProvider) error {
|
|||
loadGitFrom(cfg)
|
||||
loadMirrorFrom(cfg)
|
||||
loadMarkupFrom(cfg)
|
||||
loadPDFFrom(cfg)
|
||||
loadQuotaFrom(cfg)
|
||||
loadOtherFrom(cfg)
|
||||
return nil
|
||||
|
|
|
|||
|
|
@ -1315,6 +1315,7 @@ n_release_few = %s releases
|
|||
released_this = released this
|
||||
file.title = %s at %s
|
||||
file_raw = Raw
|
||||
file_export_pdf = Export PDF
|
||||
file_follow = Follow symlink
|
||||
file_history = History
|
||||
file_view_source = View source
|
||||
|
|
@ -3925,4 +3926,3 @@ filepreview.truncated = Preview has been truncated
|
|||
[translation_meta]
|
||||
test = This is a test string. It is not displayed in Forgejo UI but is used for testing purposes. Feel free to enter "ok" to save time (or a fun fact of your choice) to hit that sweet 100% completion mark :)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ package repo
|
|||
|
||||
import (
|
||||
"path"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
git_model "code.gitea.io/gitea/models/git"
|
||||
|
|
@ -17,6 +18,7 @@ import (
|
|||
"code.gitea.io/gitea/modules/storage"
|
||||
"code.gitea.io/gitea/routers/common"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
"code.gitea.io/gitea/services/pdfexport"
|
||||
)
|
||||
|
||||
// ServeBlobOrLFS download a git.Blob redirecting to LFS if necessary
|
||||
|
|
@ -115,6 +117,11 @@ func getBlobForEntry(ctx *context.Context) (blob *git.Blob, lastModified *time.T
|
|||
|
||||
// SingleDownload download a file by repos path
|
||||
func SingleDownload(ctx *context.Context) {
|
||||
if strings.EqualFold(ctx.FormString("format"), "pdf") {
|
||||
pdfexport.ServeMarkdownPDF(ctx)
|
||||
return
|
||||
}
|
||||
|
||||
blob, lastModified := getBlobForEntry(ctx)
|
||||
if blob == nil {
|
||||
return
|
||||
|
|
|
|||
|
|
@ -393,6 +393,10 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry) {
|
|||
ctx.Data["FileIsSymlink"] = entry.IsLink()
|
||||
ctx.Data["FileName"] = blob.Name()
|
||||
ctx.Data["RawFileLink"] = ctx.Repo.RepoLink + "/raw/" + ctx.Repo.BranchNameSubURL() + "/" + util.PathEscapeSegments(ctx.Repo.TreePath)
|
||||
ctx.Data["PDFExportLink"] = ""
|
||||
if setting.PDF.Enabled && markup.IsMarkupFile(blob.Name(), "markdown") {
|
||||
ctx.Data["PDFExportLink"] = ctx.Repo.RepoLink + "/raw/" + util.PathEscapeSegments(ctx.Repo.TreePath) + "?ref=" + url.QueryEscape(ctx.Repo.CommitID) + "&format=pdf"
|
||||
}
|
||||
|
||||
if entry.IsLink() {
|
||||
_, link, err := entry.FollowLinks()
|
||||
|
|
|
|||
65
services/pdfexport/config.go
Normal file
65
services/pdfexport/config.go
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package pdfexport
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"strings"
|
||||
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
func configFromSetting() (pdfConfig, userError) {
|
||||
cfg := pdfConfig{
|
||||
Determinism: strings.ToLower(setting.PDF.Determinism),
|
||||
Timestamp: strings.ToLower(setting.PDF.Timestamp),
|
||||
Typography: strings.ToLower(setting.PDF.Typography),
|
||||
}
|
||||
cfg.Mermaid.Strategy = strings.ToLower(setting.PDF.Mermaid.Strategy)
|
||||
cfg.Mermaid.Caption = setting.PDF.Mermaid.Caption
|
||||
cfg.OrphansWidows.Enforce = setting.PDF.OrphansWidowsEnforce
|
||||
cfg.Footer.Enabled = setting.PDF.FooterEnabled
|
||||
|
||||
// Defaults if empty/misconfigured.
|
||||
if cfg.Determinism == "" {
|
||||
cfg.Determinism = "strict"
|
||||
}
|
||||
if cfg.Timestamp == "" {
|
||||
cfg.Timestamp = "commit_time"
|
||||
}
|
||||
if cfg.Typography == "" {
|
||||
cfg.Typography = "professional"
|
||||
}
|
||||
if cfg.Mermaid.Strategy == "" {
|
||||
cfg.Mermaid.Strategy = "balanced"
|
||||
}
|
||||
|
||||
if cfg.Determinism != "strict" && cfg.Determinism != "relaxed" {
|
||||
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
|
||||
}
|
||||
if cfg.Timestamp != "commit_time" && cfg.Timestamp != "render_time" {
|
||||
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
|
||||
}
|
||||
if cfg.Typography != "basic" && cfg.Typography != "professional" {
|
||||
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
|
||||
}
|
||||
if cfg.Mermaid.Strategy != "fast" && cfg.Mermaid.Strategy != "balanced" && cfg.Mermaid.Strategy != "prestige" {
|
||||
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
|
||||
}
|
||||
|
||||
// Strict implies timestamp=commit_time only.
|
||||
if cfg.Determinism == "strict" && cfg.Timestamp != "commit_time" {
|
||||
return pdfConfig{}, errBadRequest("ERR_PDF_CONFIG_INVALID", "Invalid PDF configuration.")
|
||||
}
|
||||
|
||||
return cfg, userError{}
|
||||
}
|
||||
|
||||
func configHash(cfg pdfConfig) (string, error) {
|
||||
b, err := json.Marshal(cfg)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return sha256Hex(b), nil
|
||||
}
|
||||
|
||||
28
services/pdfexport/errors.go
Normal file
28
services/pdfexport/errors.go
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package pdfexport
|
||||
|
||||
import "fmt"
|
||||
|
||||
type userError struct {
|
||||
Status int
|
||||
ErrorID string
|
||||
Message string
|
||||
}
|
||||
|
||||
func (e userError) Error() string {
|
||||
return fmt.Sprintf("%s: %s", e.ErrorID, e.Message)
|
||||
}
|
||||
|
||||
func errBadRequest(errorID, message string) userError {
|
||||
return userError{Status: 400, ErrorID: errorID, Message: message}
|
||||
}
|
||||
|
||||
func errNotFound(errorID, message string) userError {
|
||||
return userError{Status: 404, ErrorID: errorID, Message: message}
|
||||
}
|
||||
|
||||
func errInternal(errorID, message string) userError {
|
||||
return userError{Status: 500, ErrorID: errorID, Message: message}
|
||||
}
|
||||
|
||||
148
services/pdfexport/runner.go
Normal file
148
services/pdfexport/runner.go
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package pdfexport
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
gocontext "context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultWorkerTimeout = 60 * time.Second
|
||||
maxWorkerStderrBytes = 256 * 1024
|
||||
)
|
||||
|
||||
func runWorker(input workerInput) ([]byte, userError) {
|
||||
if !setting.PDF.Enabled {
|
||||
return nil, errBadRequest("ERR_PDF_DISABLED", "PDF export is disabled.")
|
||||
}
|
||||
|
||||
jobDir, err := os.MkdirTemp("", "forgejo-pdfexport-job-*")
|
||||
if err != nil {
|
||||
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
defer func() {
|
||||
if err := os.RemoveAll(jobDir); err != nil {
|
||||
log.Error("pdfexport: remove job dir: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
inPath := filepath.Join(jobDir, "input.json")
|
||||
outPath := filepath.Join(jobDir, "output.pdf")
|
||||
|
||||
inBytes, err := json.Marshal(input)
|
||||
if err != nil {
|
||||
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
if err := os.WriteFile(inPath, inBytes, 0o600); err != nil {
|
||||
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
|
||||
runtime := setting.PDF.ContainerRuntime
|
||||
if runtime == "" {
|
||||
runtime = "podman"
|
||||
}
|
||||
image := setting.PDF.WorkerImage
|
||||
if image == "" {
|
||||
image = "localhost/forgejo/pdf-worker:v0.1"
|
||||
}
|
||||
|
||||
ctx, cancel := gocontext.WithTimeout(gocontext.Background(), defaultWorkerTimeout)
|
||||
defer cancel()
|
||||
|
||||
args := []string{
|
||||
"run", "--rm",
|
||||
"--network=none",
|
||||
"--read-only",
|
||||
"--cap-drop=ALL",
|
||||
"--security-opt=no-new-privileges",
|
||||
"--tmpfs", "/tmp:rw,noexec,nosuid,size=1024m",
|
||||
"--volume", fmt.Sprintf("%s:/job:rw", jobDir),
|
||||
"--memory", "2g",
|
||||
"--cpus", "2",
|
||||
image,
|
||||
"node", "src/index.js",
|
||||
"--in", "/job/input.json",
|
||||
"--out", "/job/output.pdf",
|
||||
}
|
||||
if strings.Contains(runtime, "podman") {
|
||||
// Proxmox/LXC deployments often confine AppArmor and can block the default container profile load.
|
||||
args = append(args[:6], append([]string{"--security-opt=apparmor=unconfined"}, args[6:]...)...)
|
||||
}
|
||||
|
||||
cmd := exec.CommandContext(ctx, runtime, args...)
|
||||
cmd.Stdout = io.Discard
|
||||
var stderr bytes.Buffer
|
||||
cmd.Stderr = &limitedWriter{W: &stderr, N: maxWorkerStderrBytes}
|
||||
|
||||
if err := cmd.Run(); err != nil {
|
||||
if errors.Is(ctx.Err(), gocontext.DeadlineExceeded) {
|
||||
return nil, errInternal("ERR_PDF_TIMEOUT", "PDF export timed out.")
|
||||
}
|
||||
// Best-effort parse of worker error envelope from stderr JSONL.
|
||||
if ue, ok := parseWorkerError(stderr.String()); ok {
|
||||
return nil, ue
|
||||
}
|
||||
log.Error("pdfexport: worker failed: %v", err)
|
||||
return nil, errInternal("ERR_PDF_WORKER_FAILED", "Failed to export PDF.")
|
||||
}
|
||||
|
||||
pdf, err := os.ReadFile(outPath)
|
||||
if err != nil || len(pdf) == 0 {
|
||||
return nil, errInternal("ERR_PDF_WORKER_NO_OUTPUT", "Failed to export PDF.")
|
||||
}
|
||||
return pdf, userError{}
|
||||
}
|
||||
|
||||
type limitedWriter struct {
|
||||
W io.Writer
|
||||
N int
|
||||
}
|
||||
|
||||
func (w *limitedWriter) Write(p []byte) (int, error) {
|
||||
if w.N <= 0 {
|
||||
return len(p), nil
|
||||
}
|
||||
if len(p) > w.N {
|
||||
p = p[:w.N]
|
||||
}
|
||||
n, err := w.W.Write(p)
|
||||
w.N -= n
|
||||
return n, err
|
||||
}
|
||||
|
||||
func parseWorkerError(stderr string) (userError, bool) {
|
||||
type workerErr struct {
|
||||
ErrorID string `json:"error_id"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
lines := strings.Split(stderr, "\n")
|
||||
for i := len(lines) - 1; i >= 0; i-- {
|
||||
line := strings.TrimSpace(lines[i])
|
||||
if line == "" {
|
||||
continue
|
||||
}
|
||||
var we workerErr
|
||||
if err := json.Unmarshal([]byte(line), &we); err != nil {
|
||||
continue
|
||||
}
|
||||
if we.ErrorID == "" || we.Message == "" {
|
||||
continue
|
||||
}
|
||||
return errInternal(we.ErrorID, we.Message), true
|
||||
}
|
||||
return userError{}, false
|
||||
}
|
||||
235
services/pdfexport/service.go
Normal file
235
services/pdfexport/service.go
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package pdfexport
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/git"
|
||||
"code.gitea.io/gitea/modules/log"
|
||||
"code.gitea.io/gitea/modules/markup"
|
||||
"code.gitea.io/gitea/modules/setting"
|
||||
"code.gitea.io/gitea/services/context"
|
||||
)
|
||||
|
||||
const (
|
||||
maxMarkdownBytes = 4 << 20
|
||||
|
||||
// ManifestSHA must be kept in sync with worker/pdf/manifest.json for cache key stability.
|
||||
// It is used as part of the worker input and cache key (v0.1 determinism requirement).
|
||||
ManifestSHA = "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
|
||||
)
|
||||
|
||||
// ServeMarkdownPDF handles GET /{owner}/{repo}/raw/{path}?ref=<sha>&format=pdf
|
||||
// It returns either PDF bytes or a safe JSON error envelope.
|
||||
func ServeMarkdownPDF(ctx *context.Context) {
|
||||
if strings.ToLower(ctx.FormString("format")) != "pdf" {
|
||||
ctx.Status(http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// Determine effective file path and commit.
|
||||
effectivePath, commit, ue := resolvePathAndCommit(ctx)
|
||||
if ue.ErrorID != "" {
|
||||
writeUserError(ctx, ue)
|
||||
return
|
||||
}
|
||||
|
||||
if !markup.IsMarkupFile(path.Base(effectivePath), "markdown") {
|
||||
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
|
||||
return
|
||||
}
|
||||
|
||||
entry, err := commit.GetTreeEntryByPath(effectivePath)
|
||||
if err != nil {
|
||||
if git.IsErrNotExist(err) {
|
||||
writeUserError(ctx, errNotFound("ERR_PDF_NOT_FOUND", "File not found."))
|
||||
return
|
||||
}
|
||||
log.Error("pdfexport: GetTreeEntryByPath: %v", err)
|
||||
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
||||
return
|
||||
}
|
||||
if entry.IsDir() || entry.IsSubModule() {
|
||||
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
|
||||
return
|
||||
}
|
||||
|
||||
blob := entry.Blob()
|
||||
rc, err := blob.DataAsync()
|
||||
if err != nil {
|
||||
log.Error("pdfexport: blob.DataAsync: %v", err)
|
||||
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
||||
return
|
||||
}
|
||||
defer func() { _ = rc.Close() }()
|
||||
|
||||
md, ue := readBounded(rc, maxMarkdownBytes)
|
||||
if ue.ErrorID != "" {
|
||||
writeUserError(ctx, ue)
|
||||
return
|
||||
}
|
||||
|
||||
cfg, ue := configFromSetting()
|
||||
if ue.ErrorID != "" {
|
||||
writeUserError(ctx, ue)
|
||||
return
|
||||
}
|
||||
cfgHash, err := configHash(cfg)
|
||||
if err != nil {
|
||||
log.Error("pdfexport: config hash: %v", err)
|
||||
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
||||
return
|
||||
}
|
||||
|
||||
cacheKey := fmt.Sprintf("%d|%s|%s|%s|%s", ctx.Repo.Repository.ID, commit.ID.String(), effectivePath, cfgHash, ManifestSHA)
|
||||
pdf, cacheHit, ue := getOrGeneratePDF(cacheKey, workerInput{
|
||||
Markdown: string(md),
|
||||
RepoMeta: workerRepoMeta{
|
||||
Owner: ctx.Repo.Repository.OwnerName,
|
||||
Repo: ctx.Repo.Repository.Name,
|
||||
Path: effectivePath,
|
||||
RepoID: ctx.Repo.Repository.ID,
|
||||
CommitSHA: commit.ID.String(),
|
||||
CommitTimeRFC3339: commit.Committer.When.UTC().Format(time.RFC3339),
|
||||
},
|
||||
Config: workerConfigWrap{PDF: cfg},
|
||||
ManifestSHA: ManifestSHA,
|
||||
})
|
||||
if ue.ErrorID != "" {
|
||||
writeUserError(ctx, ue)
|
||||
return
|
||||
}
|
||||
|
||||
filename := path.Base(effectivePath)
|
||||
if strings.HasSuffix(strings.ToLower(filename), ".md") {
|
||||
filename = filename[:len(filename)-3]
|
||||
}
|
||||
if filename == "" {
|
||||
filename = "document"
|
||||
}
|
||||
filename += ".pdf"
|
||||
|
||||
ctx.Resp.Header().Set("Content-Type", "application/pdf")
|
||||
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=%q", filename))
|
||||
if cacheHit {
|
||||
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "HIT")
|
||||
} else {
|
||||
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "MISS")
|
||||
}
|
||||
ctx.Resp.WriteHeader(http.StatusOK)
|
||||
_, _ = ctx.Resp.Write(pdf)
|
||||
}
|
||||
|
||||
func resolvePathAndCommit(ctx *context.Context) (string, *git.Commit, userError) {
|
||||
if ctx.Repo.Repository == nil || ctx.Repo.GitRepo == nil {
|
||||
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
|
||||
ref := strings.TrimSpace(ctx.FormString("ref"))
|
||||
commit := ctx.Repo.Commit
|
||||
if ref != "" {
|
||||
c, err := ctx.Repo.GitRepo.GetCommit(ref)
|
||||
if err != nil {
|
||||
if git.IsErrNotExist(err) {
|
||||
return "", nil, errNotFound("ERR_PDF_REF_NOT_FOUND", "Commit not found.")
|
||||
}
|
||||
log.Error("pdfexport: GetCommit(ref): %v", err)
|
||||
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
commit = c
|
||||
}
|
||||
if commit == nil {
|
||||
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
|
||||
// For the legacy /raw/* route with an explicit ref query parameter, treat the wildcard as the filepath
|
||||
// (avoid legacy ref guessing for deterministic cache keys).
|
||||
effectivePath := ctx.Repo.TreePath
|
||||
if ref != "" && isRawLegacyPath(ctx) {
|
||||
effectivePath = strings.TrimPrefix(ctx.Params("*"), "/")
|
||||
}
|
||||
if effectivePath == "" {
|
||||
return "", nil, errBadRequest("ERR_PDF_BAD_REQUEST", "Invalid request.")
|
||||
}
|
||||
return effectivePath, commit, userError{}
|
||||
}
|
||||
|
||||
func isRawLegacyPath(ctx *context.Context) bool {
|
||||
p := ctx.Req.URL.Path
|
||||
idx := strings.Index(p, "/raw/")
|
||||
if idx < 0 {
|
||||
return false
|
||||
}
|
||||
after := p[idx+len("/raw/"):]
|
||||
return !(strings.HasPrefix(after, "branch/") || strings.HasPrefix(after, "tag/") || strings.HasPrefix(after, "commit/") || strings.HasPrefix(after, "blob/"))
|
||||
}
|
||||
|
||||
func readBounded(r io.Reader, limit int64) ([]byte, userError) {
|
||||
lr := io.LimitReader(r, limit+1)
|
||||
b, err := io.ReadAll(lr)
|
||||
if err != nil {
|
||||
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
if int64(len(b)) > limit {
|
||||
return nil, errBadRequest("ERR_PDF_TOO_LARGE", "File is too large to export.")
|
||||
}
|
||||
return b, userError{}
|
||||
}
|
||||
|
||||
func getOrGeneratePDF(cacheKey string, input workerInput) ([]byte, bool, userError) {
|
||||
cacheDir := filepath.Join(setting.AppDataPath, "pdfexport", "cache")
|
||||
if err := os.MkdirAll(cacheDir, 0o750); err != nil {
|
||||
log.Error("pdfexport: mkdir cache: %v", err)
|
||||
return nil, false, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
||||
}
|
||||
cacheFile := filepath.Join(cacheDir, sha256Hex([]byte(cacheKey))+".pdf")
|
||||
|
||||
if b, err := os.ReadFile(cacheFile); err == nil && len(b) > 0 {
|
||||
return b, true, userError{}
|
||||
}
|
||||
|
||||
pdf, ue := runWorker(input)
|
||||
if ue.ErrorID != "" {
|
||||
return nil, false, ue
|
||||
}
|
||||
|
||||
tmp := cacheFile + ".tmp"
|
||||
if err := os.WriteFile(tmp, pdf, 0o640); err == nil {
|
||||
_ = os.Rename(tmp, cacheFile)
|
||||
} else {
|
||||
log.Error("pdfexport: write cache: %v", err)
|
||||
}
|
||||
|
||||
return pdf, false, userError{}
|
||||
}
|
||||
|
||||
func sha256Hex(b []byte) string {
|
||||
h := sha256.Sum256(b)
|
||||
return hex.EncodeToString(h[:])
|
||||
}
|
||||
|
||||
func writeUserError(ctx *context.Context, ue userError) {
|
||||
status := ue.Status
|
||||
if status == 0 {
|
||||
status = http.StatusInternalServerError
|
||||
}
|
||||
ctx.Resp.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
|
||||
ctx.Resp.WriteHeader(status)
|
||||
_ = json.NewEncoder(ctx.Resp).Encode(map[string]string{
|
||||
"error_id": ue.ErrorID,
|
||||
"message": ue.Message,
|
||||
})
|
||||
}
|
||||
45
services/pdfexport/types.go
Normal file
45
services/pdfexport/types.go
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package pdfexport
|
||||
|
||||
type workerInput struct {
|
||||
Markdown string `json:"markdown"`
|
||||
RepoMeta workerRepoMeta `json:"repoMeta"`
|
||||
Config workerConfigWrap `json:"config"`
|
||||
|
||||
ManifestSHA string `json:"manifestSHA"`
|
||||
}
|
||||
|
||||
type workerRepoMeta struct {
|
||||
Owner string `json:"owner"`
|
||||
Repo string `json:"repo"`
|
||||
Path string `json:"path"`
|
||||
RepoID interface{} `json:"repoID"`
|
||||
CommitSHA string `json:"commitSHA"`
|
||||
CommitTimeRFC3339 string `json:"commitTimeRFC3339"`
|
||||
}
|
||||
|
||||
type workerConfigWrap struct {
|
||||
PDF pdfConfig `json:"pdf"`
|
||||
}
|
||||
|
||||
// pdfConfig matches the required worker config surface (JSON keys and values).
|
||||
type pdfConfig struct {
|
||||
Determinism string `json:"determinism"` // strict|relaxed
|
||||
Timestamp string `json:"timestamp"` // commit_time|render_time
|
||||
Typography string `json:"typography"` // basic|professional
|
||||
|
||||
Mermaid struct {
|
||||
Strategy string `json:"strategy"` // fast|balanced|prestige
|
||||
Caption bool `json:"caption"`
|
||||
} `json:"mermaid"`
|
||||
|
||||
OrphansWidows struct {
|
||||
Enforce bool `json:"enforce"`
|
||||
} `json:"orphansWidows"`
|
||||
|
||||
Footer struct {
|
||||
Enabled bool `json:"enabled"`
|
||||
} `json:"footer"`
|
||||
}
|
||||
|
||||
|
|
@ -47,6 +47,9 @@
|
|||
<a class="ui mini basic button" href="{{$.SymlinkURL}}" data-kind="follow-symlink">{{ctx.Locale.Tr "repo.file_follow"}}</a>
|
||||
{{end}}
|
||||
<a class="ui mini basic button" href="{{$.RawFileLink}}">{{ctx.Locale.Tr "repo.file_raw"}}</a>
|
||||
{{if and $.PDFExportLink .IsMarkup (eq .MarkupType "markdown") .IsDisplayingRendered}}
|
||||
<a class="ui mini basic button" href="{{$.PDFExportLink}}">{{ctx.Locale.Tr "repo.file_export_pdf"}}</a>
|
||||
{{end}}
|
||||
{{if not .IsViewCommit}}
|
||||
<a class="ui mini basic button" href="{{.RepoLink}}/src/commit/{{PathEscape .CommitID}}/{{PathEscapeSegments .TreePath}}">{{ctx.Locale.Tr "repo.file_permalink"}}</a>
|
||||
{{end}}
|
||||
|
|
|
|||
17
tests/fixtures/pdfexport/basic.md
vendored
Normal file
17
tests/fixtures/pdfexport/basic.md
vendored
Normal file
|
|
@ -0,0 +1,17 @@
|
|||
# PDF Export Fixture — Basic
|
||||
|
||||
KNOWN_UNIQUE_STRING: PDFX_BASIC_6d7d6fdd
|
||||
|
||||
This fixture ensures the PDF contains selectable text, including headings, paragraphs, lists and code.
|
||||
|
||||
## List
|
||||
|
||||
- Alpha
|
||||
- Beta
|
||||
- Gamma
|
||||
|
||||
## Code
|
||||
|
||||
```bash
|
||||
echo "hello"
|
||||
```
|
||||
11
tests/fixtures/pdfexport/external_resources.md
vendored
Normal file
11
tests/fixtures/pdfexport/external_resources.md
vendored
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
# PDF Export Fixture — External Resources Blocked
|
||||
|
||||
KNOWN_UNIQUE_STRING: PDFX_EXT_25d1c31b
|
||||
|
||||
This fixture contains external resources which must not be fetched in v0.1.
|
||||
|
||||

|
||||
|
||||
[external-link](https://example.com/)
|
||||
|
||||
<script>alert("raw html disabled")</script>
|
||||
12
tests/fixtures/pdfexport/mermaid.md
vendored
Normal file
12
tests/fixtures/pdfexport/mermaid.md
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
# PDF Export Fixture — Mermaid
|
||||
|
||||
KNOWN_UNIQUE_STRING: PDFX_MERMAID_2a2dbf1d
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A[Alpha] --> B[Beta]
|
||||
B --> C[Gamma]
|
||||
C --> D[Delta]
|
||||
```
|
||||
|
||||
The diagram labels must remain selectable PDF text.
|
||||
16
tests/fixtures/pdfexport/mermaid_wide.md
vendored
Normal file
16
tests/fixtures/pdfexport/mermaid_wide.md
vendored
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
# PDF Export Fixture — Mermaid Wide
|
||||
|
||||
KNOWN_UNIQUE_STRING: PDFX_WIDE_bf3a6c2e
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
A[Start] --> B[Step 1]
|
||||
B --> C[Step 2]
|
||||
C --> D[Step 3]
|
||||
D --> E[Step 4]
|
||||
E --> F[Step 5]
|
||||
F --> G[Step 6]
|
||||
G --> H[Step 7]
|
||||
H --> I[Step 8]
|
||||
I --> J[Finish]
|
||||
```
|
||||
49
worker/pdf/Dockerfile
Normal file
49
worker/pdf/Dockerfile
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
FROM node:20-bookworm-slim
|
||||
|
||||
ENV NODE_ENV=production \
|
||||
TZ=UTC \
|
||||
PUPPETEER_CACHE_DIR=/opt/puppeteer \
|
||||
PUPPETEER_SKIP_DOWNLOAD=0
|
||||
|
||||
RUN apt-get update -y && apt-get install -y --no-install-recommends \
|
||||
ca-certificates \
|
||||
dumb-init \
|
||||
qpdf \
|
||||
poppler-utils \
|
||||
libasound2 \
|
||||
libatk-bridge2.0-0 \
|
||||
libatk1.0-0 \
|
||||
libcups2 \
|
||||
libdrm2 \
|
||||
libgbm1 \
|
||||
libgtk-3-0 \
|
||||
libnss3 \
|
||||
libpango-1.0-0 \
|
||||
libpangocairo-1.0-0 \
|
||||
libx11-6 \
|
||||
libx11-xcb1 \
|
||||
libxcb1 \
|
||||
libxcomposite1 \
|
||||
libxdamage1 \
|
||||
libxext6 \
|
||||
libxfixes3 \
|
||||
libxrandr2 \
|
||||
libxrender1 \
|
||||
libxshmfence1 \
|
||||
libxkbcommon0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
WORKDIR /opt/forgejo-pdf
|
||||
|
||||
COPY package.json package-lock.json ./
|
||||
RUN npm ci --omit=dev
|
||||
|
||||
COPY src ./src
|
||||
COPY scripts ./scripts
|
||||
COPY assets ./assets
|
||||
|
||||
RUN node scripts/copy-assets.js \
|
||||
&& node scripts/generate-manifest.js
|
||||
|
||||
ENTRYPOINT ["dumb-init", "--"]
|
||||
CMD ["node", "src/index.js"]
|
||||
66
worker/pdf/assets/css/basic.css
Normal file
66
worker/pdf/assets/css/basic.css
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
@font-face {
|
||||
font-family: "IBM Plex Sans";
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: url("../fonts/ibm-plex-sans-latin-400-normal.woff2") format("woff2");
|
||||
font-display: swap;
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "IBM Plex Mono";
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: url("../fonts/ibm-plex-mono-latin-400-normal.woff2") format("woff2");
|
||||
font-display: swap;
|
||||
}
|
||||
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 20mm;
|
||||
}
|
||||
|
||||
html, body {
|
||||
font-family: "IBM Plex Sans", system-ui, -apple-system, "Segoe UI", Roboto, Arial, sans-serif;
|
||||
color: #111827;
|
||||
background: #ffffff;
|
||||
}
|
||||
|
||||
p, li, a, h1, h2, h3, h4, h5, h6 {
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
h1 { break-before: page; }
|
||||
p { orphans: 3; widows: 3; }
|
||||
pre, table, figure, blockquote { break-inside: avoid; }
|
||||
|
||||
pre, code {
|
||||
font-family: "IBM Plex Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, "Liberation Mono", monospace;
|
||||
}
|
||||
|
||||
pre {
|
||||
background: #f3f4f6;
|
||||
padding: 10px 12px;
|
||||
border-radius: 6px;
|
||||
overflow-wrap: anywhere;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
.mermaid-figure {
|
||||
margin: 12px 0;
|
||||
}
|
||||
|
||||
.mermaid-figure svg {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.mermaid-landscape {
|
||||
break-before: page;
|
||||
page: mermaidLandscape;
|
||||
}
|
||||
|
||||
@page mermaidLandscape {
|
||||
size: A4 landscape;
|
||||
margin: 20mm;
|
||||
}
|
||||
98
worker/pdf/assets/css/professional.css
Normal file
98
worker/pdf/assets/css/professional.css
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
@font-face {
|
||||
font-family: "IBM Plex Sans";
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: url("../fonts/ibm-plex-sans-latin-400-normal.woff2") format("woff2");
|
||||
font-display: swap;
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "IBM Plex Sans";
|
||||
font-style: normal;
|
||||
font-weight: 600;
|
||||
src: url("../fonts/ibm-plex-sans-latin-600-normal.woff2") format("woff2");
|
||||
font-display: swap;
|
||||
}
|
||||
|
||||
@font-face {
|
||||
font-family: "IBM Plex Mono";
|
||||
font-style: normal;
|
||||
font-weight: 400;
|
||||
src: url("../fonts/ibm-plex-mono-latin-400-normal.woff2") format("woff2");
|
||||
font-display: swap;
|
||||
}
|
||||
|
||||
@page {
|
||||
size: A4;
|
||||
margin: 18mm 18mm 22mm 18mm;
|
||||
}
|
||||
|
||||
html, body {
|
||||
font-family: "IBM Plex Sans", system-ui, -apple-system, "Segoe UI", Roboto, Arial, sans-serif;
|
||||
color: #0f172a;
|
||||
background: #ffffff;
|
||||
font-size: 11.5pt;
|
||||
line-height: 1.45;
|
||||
}
|
||||
|
||||
p, li, a, h1, h2, h3, h4, h5, h6 {
|
||||
overflow-wrap: anywhere;
|
||||
word-break: break-word;
|
||||
}
|
||||
|
||||
h1, h2, h3, h4, h5, h6 {
|
||||
font-weight: 600;
|
||||
color: #0b1220;
|
||||
}
|
||||
|
||||
h1 { break-before: page; font-size: 20pt; margin: 0 0 10pt; }
|
||||
h2 { font-size: 15pt; margin: 18pt 0 8pt; }
|
||||
h3 { font-size: 12.5pt; margin: 14pt 0 6pt; }
|
||||
|
||||
p { orphans: 3; widows: 3; margin: 0 0 10pt; }
|
||||
pre, table, figure, blockquote { break-inside: avoid; }
|
||||
|
||||
pre, code {
|
||||
font-family: "IBM Plex Mono", ui-monospace, SFMono-Regular, Menlo, Monaco, "Liberation Mono", monospace;
|
||||
}
|
||||
|
||||
pre {
|
||||
background: #f1f5f9;
|
||||
border: 1px solid #e2e8f0;
|
||||
padding: 10px 12px;
|
||||
border-radius: 8px;
|
||||
overflow-wrap: anywhere;
|
||||
white-space: pre-wrap;
|
||||
}
|
||||
|
||||
blockquote {
|
||||
margin: 12pt 0;
|
||||
padding: 0 0 0 10pt;
|
||||
border-left: 3px solid #cbd5e1;
|
||||
color: #334155;
|
||||
}
|
||||
|
||||
hr {
|
||||
border: 0;
|
||||
border-top: 1px solid #e2e8f0;
|
||||
margin: 14pt 0;
|
||||
}
|
||||
|
||||
.mermaid-figure {
|
||||
margin: 12pt 0;
|
||||
}
|
||||
|
||||
.mermaid-figure svg {
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
}
|
||||
|
||||
.mermaid-landscape {
|
||||
break-before: page;
|
||||
page: mermaidLandscape;
|
||||
}
|
||||
|
||||
@page mermaidLandscape {
|
||||
size: A4 landscape;
|
||||
margin: 20mm;
|
||||
}
|
||||
4
worker/pdf/assets/js/runtime.js
Normal file
4
worker/pdf/assets/js/runtime.js
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
// This file is intentionally tiny; the heavy dependencies are loaded as file:// assets.
|
||||
// It serves as a stable entrypoint for in-page render orchestration.
|
||||
// (The actual work is executed by src/render_pdf.js via page.evaluate().)
|
||||
|
||||
38
worker/pdf/manifest.json
Normal file
38
worker/pdf/manifest.json
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
{
|
||||
"versions": {
|
||||
"worker": "0.1.0",
|
||||
"node": "v20.19.6",
|
||||
"puppeteer": "23.11.1",
|
||||
"mermaid": "10.9.1",
|
||||
"pagedjs": "0.4.3",
|
||||
"markdownIt": "14.1.0",
|
||||
"sanitizeHtml": "2.14.0",
|
||||
"ibmPlexSans": "5.2.6",
|
||||
"ibmPlexMono": "5.2.6",
|
||||
"lockfileVersion": 3
|
||||
},
|
||||
"chromium": {
|
||||
"sha256": "8b60627021064a6a1ecc3cf7f41d528747ea7f95713ca72b369833fa89638b0a",
|
||||
"version": null
|
||||
},
|
||||
"code": {
|
||||
"src/index.js": "e0add4a9e0adba7752b6a70b41dc9592aee937045ceba4e81b4ef2c2d5d6f935",
|
||||
"src/render_pdf.js": "915fadbb9934fddf8f842d28459375e2b693eb6c646507cbeb7db05387038eb1",
|
||||
"src/validate.js": "42fcc411306036168ad6ae68626d07cb6b5175814559f4055f98fc2925bfdb55",
|
||||
"src/errors.js": "4afcb10eb0b5cfeaba69599e3d0d2d42cdc68655a93d20dad3a1508585df1529",
|
||||
"src/logger.js": "7ec580c7154036fda4fa9617442a5d9ae5e1839139010c78e555e0c37b7f11a2",
|
||||
"scripts/test-fixtures.js": "4694d98297ae73189b602c2d2d7941b8354e92e17ce4ba3c71f72db1a4408dd7",
|
||||
"scripts/copy-assets.js": "e0b9075ebef4962bc36738136d9a5f99b6b58e959234e944fd4b4f429971fbac",
|
||||
"scripts/generate-manifest.js": "711a9a1677728b44aeb80cb7f87c29ef04f1a74e9a46e1028b76878fbe25906b"
|
||||
},
|
||||
"assets": {
|
||||
"assets/js/mermaid.min.js": "61b335a46df05a7ce1c98378f60e5f3e77a7fb608a1056997e8a649304a936d6",
|
||||
"assets/js/paged.polyfill.js": "f59f361802416c770d549a647958649af2cf6601999924bc00e4f507dad5269f",
|
||||
"assets/css/basic.css": "efcf1befdb7d9708981530cc0ec685154551712dbf369f05c3dbd61627315230",
|
||||
"assets/css/professional.css": "02bd01ad088ba4d07e9a343823406e7a787222f12fbe6c528026a2f80c8106cc",
|
||||
"assets/fonts/ibm-plex-sans-latin-400-normal.woff2": "3b646991d30055a93a4ecc499713d4347953a74a947ecab435ab72070cbdab0e",
|
||||
"assets/fonts/ibm-plex-sans-latin-600-normal.woff2": "8960851d691c054ed38e259bdcf1a6190d157b4203ed5bb32c632a863fb8ec2f",
|
||||
"assets/fonts/ibm-plex-mono-latin-400-normal.woff2": "3c5a451f9ec27a354b0c2bcca636c6ec17a651281aabf29f8427e210a1d31e85"
|
||||
},
|
||||
"manifest_sha": "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
|
||||
}
|
||||
2950
worker/pdf/package-lock.json
generated
Normal file
2950
worker/pdf/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
24
worker/pdf/package.json
Normal file
24
worker/pdf/package.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"name": "forgejo-pdf-worker",
|
||||
"version": "0.1.0",
|
||||
"private": true,
|
||||
"type": "commonjs",
|
||||
"engines": {
|
||||
"node": ">=20"
|
||||
},
|
||||
"scripts": {
|
||||
"lint": "node -c src/index.js",
|
||||
"copy-assets": "node scripts/copy-assets.js",
|
||||
"generate-manifest": "node scripts/generate-manifest.js",
|
||||
"test:fixtures": "node scripts/test-fixtures.js"
|
||||
},
|
||||
"dependencies": {
|
||||
"@fontsource/ibm-plex-mono": "5.2.6",
|
||||
"@fontsource/ibm-plex-sans": "5.2.6",
|
||||
"markdown-it": "14.1.0",
|
||||
"mermaid": "10.9.1",
|
||||
"pagedjs": "0.4.3",
|
||||
"puppeteer": "23.11.1",
|
||||
"sanitize-html": "2.14.0"
|
||||
}
|
||||
}
|
||||
43
worker/pdf/scripts/copy-assets.js
Normal file
43
worker/pdf/scripts/copy-assets.js
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
|
||||
function copyFile(src, dst) {
|
||||
fs.mkdirSync(path.dirname(dst), { recursive: true });
|
||||
fs.copyFileSync(src, dst);
|
||||
}
|
||||
|
||||
function pkgRoot(pkg) {
|
||||
try {
|
||||
return path.dirname(require.resolve(`${pkg}/package.json`));
|
||||
} catch {
|
||||
const entry = require.resolve(pkg);
|
||||
let dir = path.dirname(entry);
|
||||
for (let i = 0; i < 8; i++) {
|
||||
if (fs.existsSync(path.join(dir, "package.json"))) return dir;
|
||||
dir = path.dirname(dir);
|
||||
}
|
||||
throw new Error(`Unable to locate package root: ${pkg}`);
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
const root = path.resolve(__dirname, "..");
|
||||
const assets = path.join(root, "assets");
|
||||
|
||||
// Mermaid bundle (browser).
|
||||
// Keep it local; no external fetches at runtime.
|
||||
copyFile(path.join(pkgRoot("mermaid"), "dist/mermaid.min.js"), path.join(assets, "js/mermaid.min.js"));
|
||||
|
||||
// Paged.js bundle.
|
||||
copyFile(path.join(pkgRoot("pagedjs"), "dist/paged.polyfill.js"), path.join(assets, "js/paged.polyfill.js"));
|
||||
|
||||
// Fonts: only include latin subsets required by the default CSS.
|
||||
const sansDir = path.join(pkgRoot("@fontsource/ibm-plex-sans"), "files");
|
||||
const monoDir = path.join(pkgRoot("@fontsource/ibm-plex-mono"), "files");
|
||||
|
||||
copyFile(path.join(sansDir, "ibm-plex-sans-latin-400-normal.woff2"), path.join(assets, "fonts/ibm-plex-sans-latin-400-normal.woff2"));
|
||||
copyFile(path.join(sansDir, "ibm-plex-sans-latin-600-normal.woff2"), path.join(assets, "fonts/ibm-plex-sans-latin-600-normal.woff2"));
|
||||
copyFile(path.join(monoDir, "ibm-plex-mono-latin-400-normal.woff2"), path.join(assets, "fonts/ibm-plex-mono-latin-400-normal.woff2"));
|
||||
}
|
||||
|
||||
main();
|
||||
88
worker/pdf/scripts/generate-manifest.js
Normal file
88
worker/pdf/scripts/generate-manifest.js
Normal file
|
|
@ -0,0 +1,88 @@
|
|||
const crypto = require("node:crypto");
|
||||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
|
||||
function sha256File(p) {
|
||||
const h = crypto.createHash("sha256");
|
||||
h.update(fs.readFileSync(p));
|
||||
return h.digest("hex");
|
||||
}
|
||||
|
||||
function sha256String(s) {
|
||||
return crypto.createHash("sha256").update(s).digest("hex");
|
||||
}
|
||||
|
||||
function tryReadJSON(p) {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(p, "utf8"));
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function main() {
|
||||
const root = path.resolve(__dirname, "..");
|
||||
const pkg = tryReadJSON(path.join(root, "package.json")) || {};
|
||||
const lock = tryReadJSON(path.join(root, "package-lock.json")) || {};
|
||||
|
||||
const code = {
|
||||
"src/index.js": sha256File(path.join(root, "src/index.js")),
|
||||
"src/render_pdf.js": sha256File(path.join(root, "src/render_pdf.js")),
|
||||
"src/validate.js": sha256File(path.join(root, "src/validate.js")),
|
||||
"src/errors.js": sha256File(path.join(root, "src/errors.js")),
|
||||
"src/logger.js": sha256File(path.join(root, "src/logger.js")),
|
||||
"scripts/test-fixtures.js": sha256File(path.join(root, "scripts/test-fixtures.js")),
|
||||
"scripts/copy-assets.js": sha256File(path.join(root, "scripts/copy-assets.js")),
|
||||
"scripts/generate-manifest.js": sha256File(path.join(root, "scripts/generate-manifest.js"))
|
||||
};
|
||||
|
||||
const assets = {
|
||||
"assets/js/mermaid.min.js": sha256File(path.join(root, "assets/js/mermaid.min.js")),
|
||||
"assets/js/paged.polyfill.js": sha256File(path.join(root, "assets/js/paged.polyfill.js")),
|
||||
"assets/css/basic.css": sha256File(path.join(root, "assets/css/basic.css")),
|
||||
"assets/css/professional.css": sha256File(path.join(root, "assets/css/professional.css")),
|
||||
"assets/fonts/ibm-plex-sans-latin-400-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-sans-latin-400-normal.woff2")),
|
||||
"assets/fonts/ibm-plex-sans-latin-600-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-sans-latin-600-normal.woff2")),
|
||||
"assets/fonts/ibm-plex-mono-latin-400-normal.woff2": sha256File(path.join(root, "assets/fonts/ibm-plex-mono-latin-400-normal.woff2"))
|
||||
};
|
||||
|
||||
let chromePath = null;
|
||||
let chromeSha = null;
|
||||
let chromeVersion = null;
|
||||
try {
|
||||
const puppeteer = require("puppeteer");
|
||||
chromePath = puppeteer.executablePath();
|
||||
chromeSha = sha256File(chromePath);
|
||||
if (typeof puppeteer.browserVersion === "function") {
|
||||
chromeVersion = puppeteer.browserVersion();
|
||||
}
|
||||
} catch {
|
||||
// noop
|
||||
}
|
||||
|
||||
const deps = (pkg && pkg.dependencies) || {};
|
||||
const manifestCore = {
|
||||
versions: {
|
||||
worker: pkg.version || "0.0.0",
|
||||
node: process.version,
|
||||
puppeteer: deps.puppeteer || null,
|
||||
mermaid: deps.mermaid || null,
|
||||
pagedjs: deps.pagedjs || null,
|
||||
markdownIt: deps["markdown-it"] || null,
|
||||
sanitizeHtml: deps["sanitize-html"] || null,
|
||||
ibmPlexSans: deps["@fontsource/ibm-plex-sans"] || null,
|
||||
ibmPlexMono: deps["@fontsource/ibm-plex-mono"] || null,
|
||||
lockfileVersion: lock.lockfileVersion || null
|
||||
},
|
||||
chromium: chromePath ? { sha256: chromeSha, version: chromeVersion || null } : null,
|
||||
code,
|
||||
assets
|
||||
};
|
||||
|
||||
const canonical = JSON.stringify(manifestCore);
|
||||
const manifest = { ...manifestCore, manifest_sha: sha256String(canonical) };
|
||||
|
||||
fs.writeFileSync(path.join(root, "manifest.json"), JSON.stringify(manifest, null, 2));
|
||||
}
|
||||
|
||||
main();
|
||||
133
worker/pdf/scripts/test-fixtures.js
Normal file
133
worker/pdf/scripts/test-fixtures.js
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
const { spawnSync, execFileSync } = require("node:child_process");
|
||||
|
||||
function parseArgs(argv) {
|
||||
const out = { fixtures: "/fixtures", outDir: "/tmp/pdf-fixtures-out" };
|
||||
for (let i = 2; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === "--fixtures") out.fixtures = argv[++i];
|
||||
else if (a === "--out") out.outDir = argv[++i];
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
||||
function readManifestSHA() {
|
||||
const p = "/opt/forgejo-pdf/manifest.json";
|
||||
const b = fs.readFileSync(p, "utf8");
|
||||
const j = JSON.parse(b);
|
||||
if (!j.manifest_sha || typeof j.manifest_sha !== "string") {
|
||||
throw new Error("manifest.json missing manifest_sha");
|
||||
}
|
||||
return j.manifest_sha;
|
||||
}
|
||||
|
||||
function extractKnownUniqueString(md) {
|
||||
const m = md.match(/KNOWN_UNIQUE_STRING:\s*([A-Za-z0-9_\-\.]+)/);
|
||||
return m ? m[1] : null;
|
||||
}
|
||||
|
||||
function containsMermaid(md) {
|
||||
return /```mermaid[\s\S]*?```/m.test(md);
|
||||
}
|
||||
|
||||
function run(cmd, args, opts = {}) {
|
||||
const res = spawnSync(cmd, args, { encoding: "utf8", ...opts });
|
||||
return res;
|
||||
}
|
||||
|
||||
function main() {
|
||||
const { fixtures, outDir } = parseArgs(process.argv);
|
||||
fs.mkdirSync(outDir, { recursive: true });
|
||||
|
||||
const manifestSHA = readManifestSHA();
|
||||
|
||||
const pdfConfig = {
|
||||
pdf: {
|
||||
determinism: "strict",
|
||||
timestamp: "commit_time",
|
||||
typography: "professional",
|
||||
mermaid: { strategy: "balanced", caption: false },
|
||||
orphansWidows: { enforce: true },
|
||||
footer: { enabled: true }
|
||||
}
|
||||
};
|
||||
|
||||
const files = fs
|
||||
.readdirSync(fixtures)
|
||||
.filter((f) => f.endsWith(".md"))
|
||||
.sort();
|
||||
|
||||
if (files.length === 0) {
|
||||
throw new Error("no fixture markdown files found");
|
||||
}
|
||||
|
||||
for (const f of files) {
|
||||
const mdPath = path.join(fixtures, f);
|
||||
const md = fs.readFileSync(mdPath, "utf8");
|
||||
const expected = extractKnownUniqueString(md);
|
||||
if (!expected) throw new Error(`fixture missing KNOWN_UNIQUE_STRING: ${f}`);
|
||||
|
||||
const input = {
|
||||
markdown: md,
|
||||
repoMeta: {
|
||||
owner: "fixture",
|
||||
repo: "forgejo-pdf",
|
||||
path: f,
|
||||
repoID: 1,
|
||||
commitSHA: "0123456789abcdef0123456789abcdef01234567",
|
||||
commitTimeRFC3339: "2020-01-02T03:04:05Z"
|
||||
},
|
||||
config: pdfConfig,
|
||||
manifestSHA
|
||||
};
|
||||
|
||||
const jobDir = fs.mkdtempSync(path.join(outDir, "job-"));
|
||||
const inPath = path.join(jobDir, "input.json");
|
||||
const outPath = path.join(jobDir, "output.pdf");
|
||||
fs.writeFileSync(inPath, JSON.stringify(input), "utf8");
|
||||
|
||||
const res = run("node", ["src/index.js", "--in", inPath, "--out", outPath], {
|
||||
cwd: "/opt/forgejo-pdf"
|
||||
});
|
||||
if (res.status !== 0) {
|
||||
throw new Error(`worker failed for ${f}: ${res.stderr.trim() || res.stdout.trim()}`);
|
||||
}
|
||||
|
||||
const logs = res.stderr
|
||||
.split("\n")
|
||||
.map((l) => l.trim())
|
||||
.filter(Boolean)
|
||||
.map((l) => {
|
||||
try {
|
||||
return JSON.parse(l);
|
||||
} catch {
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(Boolean);
|
||||
|
||||
const done = logs.findLast ? logs.findLast((l) => l.event === "done") : logs.reverse().find((l) => l.event === "done");
|
||||
if (!done) throw new Error(`missing done log for ${f}`);
|
||||
if (done.blocked_requests !== 0) throw new Error(`blocked_requests != 0 for ${f}`);
|
||||
|
||||
const hasMermaid = containsMermaid(md);
|
||||
if (hasMermaid && (!Number.isFinite(done.mermaid_count) || done.mermaid_count < 1)) {
|
||||
throw new Error(`expected mermaid_count >= 1 for ${f}`);
|
||||
}
|
||||
if (!hasMermaid && done.mermaid_count !== 0) {
|
||||
throw new Error(`expected mermaid_count == 0 for ${f}`);
|
||||
}
|
||||
|
||||
execFileSync("qpdf", ["--check", outPath], { stdio: "inherit" });
|
||||
|
||||
const text = execFileSync("pdftotext", [outPath, "-"], { encoding: "utf8" });
|
||||
const normalized = text.replace(/\s+/g, "");
|
||||
const expectedNorm = expected.replace(/\s+/g, "");
|
||||
if (!normalized.includes(expectedNorm)) {
|
||||
throw new Error(`pdftotext missing expected marker for ${f}: ${expected}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
20
worker/pdf/src/errors.js
Normal file
20
worker/pdf/src/errors.js
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
const ERROR = Object.freeze({
|
||||
BAD_INPUT: { error_id: "ERR_BAD_INPUT", message: "Invalid input." },
|
||||
BAD_CONFIG: { error_id: "ERR_BAD_CONFIG", message: "Invalid configuration." },
|
||||
NETWORK_ATTEMPT: { error_id: "ERR_NETWORK_ATTEMPT", message: "External network access is not allowed." },
|
||||
MERMAID_NO_TEXT: { error_id: "ERR_MERMAID_NO_TEXT", message: "Mermaid output must contain selectable text." },
|
||||
MERMAID_FOREIGN_OBJECT: { error_id: "ERR_MERMAID_FOREIGN_OBJECT", message: "Mermaid output contains forbidden elements." },
|
||||
TIMEOUT: { error_id: "ERR_TIMEOUT", message: "Render timed out." },
|
||||
INTERNAL: { error_id: "ERR_INTERNAL", message: "Render failed." }
|
||||
});
|
||||
|
||||
function asFailure(errorDef, detailsCode) {
|
||||
return {
|
||||
error_id: errorDef.error_id,
|
||||
message: errorDef.message,
|
||||
...(detailsCode ? { details_code: detailsCode } : {})
|
||||
};
|
||||
}
|
||||
|
||||
module.exports = { ERROR, asFailure };
|
||||
|
||||
73
worker/pdf/src/index.js
Normal file
73
worker/pdf/src/index.js
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
|
||||
const { log } = require("./logger");
|
||||
const { ERROR, asFailure } = require("./errors");
|
||||
const { validateInput } = require("./validate");
|
||||
const { renderToPDF } = require("./render_pdf");
|
||||
|
||||
function parseArgs(argv) {
|
||||
const args = { inPath: null, outPath: null };
|
||||
for (let i = 2; i < argv.length; i++) {
|
||||
const a = argv[i];
|
||||
if (a === "--in") args.inPath = argv[++i];
|
||||
else if (a === "--out") args.outPath = argv[++i];
|
||||
}
|
||||
return args;
|
||||
}
|
||||
|
||||
async function main() {
|
||||
const { inPath, outPath } = parseArgs(process.argv);
|
||||
if (!inPath || !outPath) {
|
||||
log(asFailure(ERROR.BAD_INPUT, "missing_cli_args"));
|
||||
process.exitCode = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
let inputRaw;
|
||||
try {
|
||||
inputRaw = fs.readFileSync(inPath, "utf8");
|
||||
} catch {
|
||||
log(asFailure(ERROR.BAD_INPUT, "read_input_failed"));
|
||||
process.exitCode = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
let input;
|
||||
try {
|
||||
input = JSON.parse(inputRaw);
|
||||
} catch {
|
||||
log(asFailure(ERROR.BAD_INPUT, "parse_input_failed"));
|
||||
process.exitCode = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
const bad = validateInput(input);
|
||||
if (bad) {
|
||||
log(asFailure(bad, "validate_input_failed"));
|
||||
process.exitCode = 2;
|
||||
return;
|
||||
}
|
||||
|
||||
const started = process.hrtime.bigint();
|
||||
log({ event: "start", repo: `${input.repoMeta.owner}/${input.repoMeta.repo}`, path: input.repoMeta.path });
|
||||
|
||||
try {
|
||||
const { pdf, blockedRequests, mermaidCount } = await renderToPDF(input);
|
||||
fs.mkdirSync(path.dirname(outPath), { recursive: true });
|
||||
fs.writeFileSync(outPath, pdf);
|
||||
const elapsedMs = Number((process.hrtime.bigint() - started) / 1000000n);
|
||||
log({ event: "done", bytes: pdf.length, ms: elapsedMs, blocked_requests: blockedRequests, mermaid_count: mermaidCount });
|
||||
} catch (e) {
|
||||
const safe = e && typeof e === "object" && e.safeError ? e.safeError : null;
|
||||
if (safe) {
|
||||
log(safe);
|
||||
process.exitCode = 1;
|
||||
return;
|
||||
}
|
||||
log(asFailure(ERROR.INTERNAL, "unhandled_exception"));
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
6
worker/pdf/src/logger.js
Normal file
6
worker/pdf/src/logger.js
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
function log(obj) {
|
||||
process.stderr.write(`${JSON.stringify(obj)}\n`);
|
||||
}
|
||||
|
||||
module.exports = { log };
|
||||
|
||||
405
worker/pdf/src/render_pdf.js
Normal file
405
worker/pdf/src/render_pdf.js
Normal file
|
|
@ -0,0 +1,405 @@
|
|||
const crypto = require("node:crypto");
|
||||
const fs = require("node:fs");
|
||||
const path = require("node:path");
|
||||
|
||||
const sanitizeHtml = require("sanitize-html");
|
||||
const MarkdownIt = require("markdown-it");
|
||||
const puppeteer = require("puppeteer");
|
||||
|
||||
const { ERROR, asFailure } = require("./errors");
|
||||
|
||||
function safeThrow(errorDef, detailsCode) {
|
||||
const e = new Error(errorDef.error_id);
|
||||
e.safeError = asFailure(errorDef, detailsCode);
|
||||
throw e;
|
||||
}
|
||||
|
||||
function sha256Hex(text) {
|
||||
return crypto.createHash("sha256").update(text).digest("hex");
|
||||
}
|
||||
|
||||
function normalizeMarkdown(md) {
|
||||
return md.replace(/\r\n/g, "\n").replace(/\r/g, "\n");
|
||||
}
|
||||
|
||||
function markdownToHTML(markdown) {
|
||||
const md = new MarkdownIt({
|
||||
html: false,
|
||||
linkify: true
|
||||
});
|
||||
|
||||
const html = md.render(markdown);
|
||||
|
||||
// Sanitize and remove images/external resources for v0.1.
|
||||
const cleaned = sanitizeHtml(html, {
|
||||
allowedTags: sanitizeHtml.defaults.allowedTags.filter((t) => t !== "img"),
|
||||
allowedAttributes: {
|
||||
a: ["href", "title"],
|
||||
code: ["class"],
|
||||
pre: ["class"],
|
||||
span: ["class"],
|
||||
p: ["class"],
|
||||
h1: ["id"],
|
||||
h2: ["id"],
|
||||
h3: ["id"],
|
||||
h4: ["id"],
|
||||
h5: ["id"],
|
||||
h6: ["id"]
|
||||
},
|
||||
allowVulnerableTags: false,
|
||||
transformTags: {
|
||||
a: (tagName, attribs) => {
|
||||
const href = attribs.href || "";
|
||||
const lower = href.trim().toLowerCase();
|
||||
const forbidden =
|
||||
lower.startsWith("http:") ||
|
||||
lower.startsWith("https:") ||
|
||||
lower.startsWith("ws:") ||
|
||||
lower.startsWith("wss:") ||
|
||||
lower.startsWith("mailto:") ||
|
||||
lower.startsWith("javascript:") ||
|
||||
lower.startsWith("data:") ||
|
||||
lower.startsWith("file:");
|
||||
if (forbidden) return { tagName: "a", attribs: { rel: "nofollow" } };
|
||||
return { tagName, attribs: { href, rel: "nofollow" } };
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
return cleaned;
|
||||
}
|
||||
|
||||
function htmlTemplate({ bodyHTML, assetsDir, cssName, footerCSS }) {
|
||||
const cssPath = `file://${path.join(assetsDir, "css", cssName)}`;
|
||||
const runtimePath = `file://${path.join(assetsDir, "js", "runtime.js")}`;
|
||||
const mermaidPath = `file://${path.join(assetsDir, "js", "mermaid.min.js")}`;
|
||||
const pagedPath = `file://${path.join(assetsDir, "js", "paged.polyfill.js")}`;
|
||||
|
||||
return `<!doctype html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8"/>
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1"/>
|
||||
<link rel="stylesheet" href="${cssPath}"/>
|
||||
<style>${footerCSS}</style>
|
||||
</head>
|
||||
<body>
|
||||
<main id="content">${bodyHTML}</main>
|
||||
<script src="${mermaidPath}"></script>
|
||||
<script>window.PagedConfig = { auto: false };</script>
|
||||
<script src="${pagedPath}"></script>
|
||||
<script src="${runtimePath}"></script>
|
||||
</body>
|
||||
</html>`;
|
||||
}
|
||||
|
||||
function footerCSSFor(input) {
|
||||
const pdf = input.config.pdf;
|
||||
if (!pdf.footer.enabled) return "";
|
||||
|
||||
const repo = `${input.repoMeta.owner}/${input.repoMeta.repo}`;
|
||||
const sha = input.repoMeta.commitSHA.slice(0, 12);
|
||||
|
||||
let date = "";
|
||||
if (pdf.timestamp === "commit_time") {
|
||||
date = input.repoMeta.commitTimeRFC3339;
|
||||
} else {
|
||||
date = new Date().toISOString();
|
||||
}
|
||||
|
||||
const left = cssString(repo);
|
||||
const center = cssString(`${sha} • ${date}`);
|
||||
|
||||
return `
|
||||
@page {
|
||||
@bottom-left { content: "${left}"; }
|
||||
@bottom-center { content: "${center}"; }
|
||||
@bottom-right { content: counter(page); }
|
||||
}`;
|
||||
}
|
||||
|
||||
function cssString(s) {
|
||||
return String(s).replace(/\\/g, "\\\\").replace(/"/g, '\\"');
|
||||
}
|
||||
|
||||
async function renderToPDF(input) {
|
||||
const assetsDir = "/opt/forgejo-pdf/assets";
|
||||
const pdf = input.config.pdf;
|
||||
const cssName = pdf.typography === "basic" ? "basic.css" : "professional.css";
|
||||
|
||||
const bodyHTML = markdownToHTML(normalizeMarkdown(input.markdown));
|
||||
const html = htmlTemplate({ bodyHTML, assetsDir, cssName, footerCSS: footerCSSFor(input) });
|
||||
|
||||
const userDataDir = fs.mkdtempSync("/tmp/chrome-profile-");
|
||||
const pageDir = fs.mkdtempSync("/tmp/pdf-page-");
|
||||
const htmlPath = path.join(pageDir, "index.html");
|
||||
fs.writeFileSync(htmlPath, html, "utf8");
|
||||
|
||||
const browser = await puppeteer.launch({
|
||||
headless: "new",
|
||||
args: [
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--allow-file-access-from-files",
|
||||
`--user-data-dir=${userDataDir}`
|
||||
]
|
||||
});
|
||||
|
||||
try {
|
||||
const page = await browser.newPage();
|
||||
let lastPageError = null;
|
||||
page.on("pageerror", (err) => {
|
||||
lastPageError = err && typeof err.message === "string" ? err.message : String(err);
|
||||
});
|
||||
page.on("console", (msg) => {
|
||||
if (msg.type && msg.type() === "error") {
|
||||
lastPageError = msg.text ? msg.text() : String(msg);
|
||||
}
|
||||
});
|
||||
await page.setRequestInterception(true);
|
||||
let blockedRequests = 0;
|
||||
let blockedURL = null;
|
||||
page.on("request", (req) => {
|
||||
const u = req.url();
|
||||
if (u.startsWith("file:")) return req.continue();
|
||||
blockedRequests++;
|
||||
if (!blockedURL) blockedURL = u;
|
||||
return req.abort();
|
||||
});
|
||||
|
||||
await page.goto(`file://${htmlPath}`, { waitUntil: "load" });
|
||||
if (blockedRequests > 0) safeThrow(ERROR.NETWORK_ATTEMPT, `blocked:${blockedURL || "non_file_request"}`);
|
||||
|
||||
// Mermaid + Paged.js rendering happens in-page.
|
||||
const renderResult = await page.evaluate(async (payload) => {
|
||||
try {
|
||||
// Minimal SHA-256 (hex) implementation for deterministic IDs.
|
||||
// Avoids relying on SubtleCrypto availability for file:// contexts.
|
||||
function sha256HexLocal(ascii) {
|
||||
function rightRotate(value, amount) {
|
||||
return (value >>> amount) | (value << (32 - amount));
|
||||
}
|
||||
// Normalize to a byte-string (UTF-8) for consistent hashing across unicode input.
|
||||
ascii = unescape(encodeURIComponent(ascii));
|
||||
const maxWord = Math.pow(2, 32);
|
||||
let result = "";
|
||||
|
||||
const words = [];
|
||||
const asciiBitLength = ascii.length * 8;
|
||||
|
||||
let hash = sha256HexLocal.h || [];
|
||||
let k = sha256HexLocal.k || [];
|
||||
|
||||
let primeCounter = k.length;
|
||||
const isComposite = {};
|
||||
for (let candidate = 2; primeCounter < 64; candidate++) {
|
||||
if (!isComposite[candidate]) {
|
||||
for (let i = 0; i < 313; i += candidate) isComposite[i] = candidate;
|
||||
hash[primeCounter] = (Math.pow(candidate, 0.5) * maxWord) | 0;
|
||||
k[primeCounter++] = (Math.pow(candidate, 1 / 3) * maxWord) | 0;
|
||||
}
|
||||
}
|
||||
sha256HexLocal.h = hash;
|
||||
sha256HexLocal.k = k;
|
||||
|
||||
ascii += "\x80";
|
||||
while ((ascii.length % 64) - 56) ascii += "\x00";
|
||||
for (let i = 0; i < ascii.length; i++) {
|
||||
const j = ascii.charCodeAt(i);
|
||||
words[i >> 2] |= j << ((3 - i) % 4) * 8;
|
||||
}
|
||||
words[words.length] = (asciiBitLength / maxWord) | 0;
|
||||
words[words.length] = asciiBitLength;
|
||||
|
||||
for (let j = 0; j < words.length; ) {
|
||||
const w = words.slice(j, (j += 16));
|
||||
const oldHash = hash.slice(0);
|
||||
|
||||
for (let i = 0; i < 64; i++) {
|
||||
const w15 = w[i - 15];
|
||||
const w2 = w[i - 2];
|
||||
|
||||
const a = hash[0];
|
||||
const e = hash[4];
|
||||
const temp1 =
|
||||
hash[7] +
|
||||
(rightRotate(e, 6) ^ rightRotate(e, 11) ^ rightRotate(e, 25)) +
|
||||
((e & hash[5]) ^ (~e & hash[6])) +
|
||||
k[i] +
|
||||
(w[i] =
|
||||
i < 16
|
||||
? w[i]
|
||||
: (w[i - 16] +
|
||||
(rightRotate(w15, 7) ^ rightRotate(w15, 18) ^ (w15 >>> 3)) +
|
||||
w[i - 7] +
|
||||
(rightRotate(w2, 17) ^ rightRotate(w2, 19) ^ (w2 >>> 10))) |
|
||||
0);
|
||||
const temp2 =
|
||||
(rightRotate(a, 2) ^ rightRotate(a, 13) ^ rightRotate(a, 22)) +
|
||||
((a & hash[1]) ^ (a & hash[2]) ^ (hash[1] & hash[2]));
|
||||
|
||||
hash = [(temp1 + temp2) | 0].concat(hash);
|
||||
hash[4] = (hash[4] + temp1) | 0;
|
||||
hash.pop();
|
||||
}
|
||||
|
||||
for (let i = 0; i < 8; i++) {
|
||||
hash[i] = (hash[i] + oldHash[i]) | 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (let i = 0; i < 8; i++) {
|
||||
for (let j = 3; j + 1; j--) {
|
||||
const b = (hash[i] >> (j * 8)) & 255;
|
||||
result += (b < 16 ? "0" : "") + b.toString(16);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
function sanitizeSVG(svgText) {
|
||||
const parser = new DOMParser();
|
||||
const doc = parser.parseFromString(svgText, "image/svg+xml");
|
||||
const svg = doc.documentElement;
|
||||
|
||||
const forbidden = svg.querySelector("foreignObject");
|
||||
if (forbidden) throw { error_id: "ERR_MERMAID_FOREIGN_OBJECT", message: "Mermaid output contains forbidden elements." };
|
||||
|
||||
const scripts = svg.querySelectorAll("script");
|
||||
for (const s of scripts) s.remove();
|
||||
|
||||
for (const el of svg.querySelectorAll("*")) {
|
||||
for (const attr of Array.from(el.attributes)) {
|
||||
const name = attr.name;
|
||||
const value = attr.value || "";
|
||||
if (name.startsWith("on")) el.removeAttribute(name);
|
||||
if ((name === "href" || name === "xlink:href") && !value.startsWith("#")) el.removeAttribute(name);
|
||||
}
|
||||
}
|
||||
|
||||
const hasText = svg.querySelector("text, tspan");
|
||||
if (!hasText) throw { error_id: "ERR_MERMAID_NO_TEXT", message: "Mermaid output must contain selectable text." };
|
||||
|
||||
return svg.outerHTML;
|
||||
}
|
||||
|
||||
function shouldLandscape(svgEl) {
|
||||
const vb = svgEl.getAttribute("viewBox");
|
||||
if (!vb) return false;
|
||||
const parts = vb.trim().split(/\s+/).map(Number);
|
||||
if (parts.length !== 4 || parts.some((n) => !Number.isFinite(n))) return false;
|
||||
const w = parts[2];
|
||||
const h = parts[3];
|
||||
if (h <= 0) return false;
|
||||
const aspect = w / h;
|
||||
return aspect > 1.15;
|
||||
}
|
||||
|
||||
// Determinism guards.
|
||||
Math.random = () => 0.5;
|
||||
if (payload.pdf.determinism === "strict") {
|
||||
const fixed = Date.parse(payload.repoMeta.commitTimeRFC3339);
|
||||
if (Number.isFinite(fixed)) Date.now = () => fixed;
|
||||
}
|
||||
|
||||
// Render Mermaid fences found in code blocks.
|
||||
const blocks = Array.from(document.querySelectorAll("pre > code"));
|
||||
const mermaidBlocks = blocks.filter((c) => (c.className || "").includes("language-mermaid"));
|
||||
|
||||
if (typeof mermaid === "undefined" || !mermaid) throw { error_id: "ERR_INTERNAL", message: "Render failed." };
|
||||
mermaid.initialize({
|
||||
startOnLoad: false,
|
||||
securityLevel: "strict",
|
||||
htmlLabels: false,
|
||||
flowchart: { htmlLabels: false },
|
||||
sequence: { htmlLabels: false },
|
||||
state: { htmlLabels: false },
|
||||
class: { htmlLabels: false },
|
||||
fontFamily: "IBM Plex Sans",
|
||||
theme: "base",
|
||||
themeVariables: {
|
||||
fontFamily: "IBM Plex Sans",
|
||||
primaryColor: "#ffffff",
|
||||
primaryTextColor: "#111827",
|
||||
lineColor: "#6b7280",
|
||||
secondaryColor: "#f3f4f6",
|
||||
tertiaryColor: "#ffffff"
|
||||
}
|
||||
});
|
||||
|
||||
for (let i = 0; i < mermaidBlocks.length; i++) {
|
||||
const codeEl = mermaidBlocks[i];
|
||||
const diagramText = codeEl.textContent || "";
|
||||
const id = `m-${sha256HexLocal(payload.repoMeta.commitSHA + "|" + payload.repoMeta.path + "|" + i + "|" + diagramText)}`;
|
||||
|
||||
const { svg } = await mermaid.render(id, diagramText);
|
||||
const clean = sanitizeSVG(svg);
|
||||
|
||||
const container = document.createElement("figure");
|
||||
container.className = "mermaid-figure";
|
||||
container.innerHTML = clean;
|
||||
|
||||
const svgEl = container.querySelector("svg");
|
||||
if (svgEl && shouldLandscape(svgEl)) {
|
||||
container.classList.add("mermaid-landscape");
|
||||
}
|
||||
|
||||
const pre = codeEl.parentElement;
|
||||
pre.replaceWith(container);
|
||||
}
|
||||
|
||||
if (window.PagedPolyfill && window.PagedPolyfill.preview) {
|
||||
await window.PagedPolyfill.preview();
|
||||
}
|
||||
|
||||
return { ok: true, mermaid_count: mermaidBlocks.length };
|
||||
} catch (err) {
|
||||
if (err && typeof err === "object" && err.error_id && err.message) {
|
||||
return { ok: false, error_id: err.error_id, message: err.message };
|
||||
}
|
||||
return { ok: false, error_id: "ERR_INTERNAL", message: "Render failed." };
|
||||
}
|
||||
}, { pdf: input.config.pdf, repoMeta: input.repoMeta });
|
||||
|
||||
if (!renderResult || renderResult.ok !== true || typeof renderResult.mermaid_count !== "number") {
|
||||
if (renderResult && renderResult.ok === false && renderResult.error_id && renderResult.message) {
|
||||
const err = new Error(renderResult.error_id);
|
||||
err.safeError = { error_id: renderResult.error_id, message: renderResult.message };
|
||||
throw err;
|
||||
}
|
||||
safeThrow(ERROR.INTERNAL, "render_result_invalid");
|
||||
}
|
||||
const mermaidCount = renderResult.mermaid_count;
|
||||
|
||||
const pdfBuf = await page.pdf({
|
||||
format: "A4",
|
||||
printBackground: true,
|
||||
preferCSSPageSize: true
|
||||
});
|
||||
|
||||
return { pdf: Buffer.from(pdfBuf), blockedRequests, mermaidCount };
|
||||
} catch (e) {
|
||||
if (e && typeof e === "object" && e.error_id && e.message) {
|
||||
const err = new Error(e.error_id);
|
||||
err.safeError = { error_id: e.error_id, message: e.message };
|
||||
throw err;
|
||||
}
|
||||
if (e && typeof e === "object" && e.safeError) throw e;
|
||||
const msg = e && typeof e === "object" && typeof e.message === "string" ? e.message : "";
|
||||
const hint = msg || (typeof lastPageError === "string" ? lastPageError : "");
|
||||
const details = hint
|
||||
? `render_failed:${hint.slice(0, 160).replace(/[^A-Za-z0-9_.:/\\-]+/g, "_")}`
|
||||
: "render_failed";
|
||||
safeThrow(ERROR.INTERNAL, details);
|
||||
} finally {
|
||||
try {
|
||||
await browser.close();
|
||||
} catch {}
|
||||
try {
|
||||
fs.rmSync(userDataDir, { recursive: true, force: true });
|
||||
fs.rmSync(pageDir, { recursive: true, force: true });
|
||||
} catch {}
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = { renderToPDF };
|
||||
37
worker/pdf/src/validate.js
Normal file
37
worker/pdf/src/validate.js
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
const { ERROR } = require("./errors");
|
||||
|
||||
function isObject(value) {
|
||||
return value && typeof value === "object" && !Array.isArray(value);
|
||||
}
|
||||
|
||||
function validateInput(input) {
|
||||
if (!isObject(input)) return ERROR.BAD_INPUT;
|
||||
if (typeof input.markdown !== "string") return ERROR.BAD_INPUT;
|
||||
if (!isObject(input.repoMeta)) return ERROR.BAD_INPUT;
|
||||
if (!isObject(input.config) || !isObject(input.config.pdf)) return ERROR.BAD_INPUT;
|
||||
if (typeof input.manifestSHA !== "string") return ERROR.BAD_INPUT;
|
||||
|
||||
const pdf = input.config.pdf;
|
||||
if (!["strict", "relaxed"].includes(pdf.determinism)) return ERROR.BAD_CONFIG;
|
||||
if (!["commit_time", "render_time"].includes(pdf.timestamp)) return ERROR.BAD_CONFIG;
|
||||
if (!["basic", "professional"].includes(pdf.typography)) return ERROR.BAD_CONFIG;
|
||||
if (!isObject(pdf.mermaid)) return ERROR.BAD_CONFIG;
|
||||
if (!["fast", "balanced", "prestige"].includes(pdf.mermaid.strategy)) return ERROR.BAD_CONFIG;
|
||||
if (typeof pdf.mermaid.caption !== "boolean") return ERROR.BAD_CONFIG;
|
||||
if (!isObject(pdf.orphansWidows) || typeof pdf.orphansWidows.enforce !== "boolean") return ERROR.BAD_CONFIG;
|
||||
if (!isObject(pdf.footer) || typeof pdf.footer.enabled !== "boolean") return ERROR.BAD_CONFIG;
|
||||
|
||||
if (pdf.determinism === "strict" && pdf.timestamp !== "commit_time") return ERROR.BAD_CONFIG;
|
||||
|
||||
const rm = input.repoMeta;
|
||||
if (typeof rm.owner !== "string") return ERROR.BAD_INPUT;
|
||||
if (typeof rm.repo !== "string") return ERROR.BAD_INPUT;
|
||||
if (typeof rm.path !== "string") return ERROR.BAD_INPUT;
|
||||
if (typeof rm.commitSHA !== "string") return ERROR.BAD_INPUT;
|
||||
if (typeof rm.commitTimeRFC3339 !== "string") return ERROR.BAD_INPUT;
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
module.exports = { validateInput };
|
||||
|
||||
Loading…
Add table
Reference in a new issue