forgejo-pdf/services/pdfexport/service.go
codex 1ce1370983
Some checks failed
pdfexport / pdfexport-worker-fixtures (push) Has been cancelled
Add server-side Markdown→PDF export (v0.1)
2025-12-16 17:52:53 +00:00

235 lines
6.8 KiB
Go

// SPDX-License-Identifier: MIT
package pdfexport
import (
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path"
"path/filepath"
"strings"
"time"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/markup"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
)
const (
maxMarkdownBytes = 4 << 20
// ManifestSHA must be kept in sync with worker/pdf/manifest.json for cache key stability.
// It is used as part of the worker input and cache key (v0.1 determinism requirement).
ManifestSHA = "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
)
// ServeMarkdownPDF handles GET /{owner}/{repo}/raw/{path}?ref=<sha>&format=pdf
// It returns either PDF bytes or a safe JSON error envelope.
func ServeMarkdownPDF(ctx *context.Context) {
if strings.ToLower(ctx.FormString("format")) != "pdf" {
ctx.Status(http.StatusNotFound)
return
}
// Determine effective file path and commit.
effectivePath, commit, ue := resolvePathAndCommit(ctx)
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
if !markup.IsMarkupFile(path.Base(effectivePath), "markdown") {
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
return
}
entry, err := commit.GetTreeEntryByPath(effectivePath)
if err != nil {
if git.IsErrNotExist(err) {
writeUserError(ctx, errNotFound("ERR_PDF_NOT_FOUND", "File not found."))
return
}
log.Error("pdfexport: GetTreeEntryByPath: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
if entry.IsDir() || entry.IsSubModule() {
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
return
}
blob := entry.Blob()
rc, err := blob.DataAsync()
if err != nil {
log.Error("pdfexport: blob.DataAsync: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
defer func() { _ = rc.Close() }()
md, ue := readBounded(rc, maxMarkdownBytes)
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
cfg, ue := configFromSetting()
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
cfgHash, err := configHash(cfg)
if err != nil {
log.Error("pdfexport: config hash: %v", err)
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
return
}
cacheKey := fmt.Sprintf("%d|%s|%s|%s|%s", ctx.Repo.Repository.ID, commit.ID.String(), effectivePath, cfgHash, ManifestSHA)
pdf, cacheHit, ue := getOrGeneratePDF(cacheKey, workerInput{
Markdown: string(md),
RepoMeta: workerRepoMeta{
Owner: ctx.Repo.Repository.OwnerName,
Repo: ctx.Repo.Repository.Name,
Path: effectivePath,
RepoID: ctx.Repo.Repository.ID,
CommitSHA: commit.ID.String(),
CommitTimeRFC3339: commit.Committer.When.UTC().Format(time.RFC3339),
},
Config: workerConfigWrap{PDF: cfg},
ManifestSHA: ManifestSHA,
})
if ue.ErrorID != "" {
writeUserError(ctx, ue)
return
}
filename := path.Base(effectivePath)
if strings.HasSuffix(strings.ToLower(filename), ".md") {
filename = filename[:len(filename)-3]
}
if filename == "" {
filename = "document"
}
filename += ".pdf"
ctx.Resp.Header().Set("Content-Type", "application/pdf")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=%q", filename))
if cacheHit {
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "HIT")
} else {
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "MISS")
}
ctx.Resp.WriteHeader(http.StatusOK)
_, _ = ctx.Resp.Write(pdf)
}
func resolvePathAndCommit(ctx *context.Context) (string, *git.Commit, userError) {
if ctx.Repo.Repository == nil || ctx.Repo.GitRepo == nil {
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
ref := strings.TrimSpace(ctx.FormString("ref"))
commit := ctx.Repo.Commit
if ref != "" {
c, err := ctx.Repo.GitRepo.GetCommit(ref)
if err != nil {
if git.IsErrNotExist(err) {
return "", nil, errNotFound("ERR_PDF_REF_NOT_FOUND", "Commit not found.")
}
log.Error("pdfexport: GetCommit(ref): %v", err)
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
commit = c
}
if commit == nil {
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
// For the legacy /raw/* route with an explicit ref query parameter, treat the wildcard as the filepath
// (avoid legacy ref guessing for deterministic cache keys).
effectivePath := ctx.Repo.TreePath
if ref != "" && isRawLegacyPath(ctx) {
effectivePath = strings.TrimPrefix(ctx.Params("*"), "/")
}
if effectivePath == "" {
return "", nil, errBadRequest("ERR_PDF_BAD_REQUEST", "Invalid request.")
}
return effectivePath, commit, userError{}
}
func isRawLegacyPath(ctx *context.Context) bool {
p := ctx.Req.URL.Path
idx := strings.Index(p, "/raw/")
if idx < 0 {
return false
}
after := p[idx+len("/raw/"):]
return !(strings.HasPrefix(after, "branch/") || strings.HasPrefix(after, "tag/") || strings.HasPrefix(after, "commit/") || strings.HasPrefix(after, "blob/"))
}
func readBounded(r io.Reader, limit int64) ([]byte, userError) {
lr := io.LimitReader(r, limit+1)
b, err := io.ReadAll(lr)
if err != nil {
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
if int64(len(b)) > limit {
return nil, errBadRequest("ERR_PDF_TOO_LARGE", "File is too large to export.")
}
return b, userError{}
}
func getOrGeneratePDF(cacheKey string, input workerInput) ([]byte, bool, userError) {
cacheDir := filepath.Join(setting.AppDataPath, "pdfexport", "cache")
if err := os.MkdirAll(cacheDir, 0o750); err != nil {
log.Error("pdfexport: mkdir cache: %v", err)
return nil, false, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
}
cacheFile := filepath.Join(cacheDir, sha256Hex([]byte(cacheKey))+".pdf")
if b, err := os.ReadFile(cacheFile); err == nil && len(b) > 0 {
return b, true, userError{}
}
pdf, ue := runWorker(input)
if ue.ErrorID != "" {
return nil, false, ue
}
tmp := cacheFile + ".tmp"
if err := os.WriteFile(tmp, pdf, 0o640); err == nil {
_ = os.Rename(tmp, cacheFile)
} else {
log.Error("pdfexport: write cache: %v", err)
}
return pdf, false, userError{}
}
func sha256Hex(b []byte) string {
h := sha256.Sum256(b)
return hex.EncodeToString(h[:])
}
func writeUserError(ctx *context.Context, ue userError) {
status := ue.Status
if status == 0 {
status = http.StatusInternalServerError
}
ctx.Resp.Header().Set("Content-Type", "application/json; charset=utf-8")
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
ctx.Resp.WriteHeader(status)
_ = json.NewEncoder(ctx.Resp).Encode(map[string]string{
"error_id": ue.ErrorID,
"message": ue.Message,
})
}