235 lines
6.8 KiB
Go
235 lines
6.8 KiB
Go
// SPDX-License-Identifier: MIT
|
|
|
|
package pdfexport
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/markup"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
"code.gitea.io/gitea/services/context"
|
|
)
|
|
|
|
const (
|
|
maxMarkdownBytes = 4 << 20
|
|
|
|
// ManifestSHA must be kept in sync with worker/pdf/manifest.json for cache key stability.
|
|
// It is used as part of the worker input and cache key (v0.1 determinism requirement).
|
|
ManifestSHA = "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956"
|
|
)
|
|
|
|
// ServeMarkdownPDF handles GET /{owner}/{repo}/raw/{path}?ref=<sha>&format=pdf
|
|
// It returns either PDF bytes or a safe JSON error envelope.
|
|
func ServeMarkdownPDF(ctx *context.Context) {
|
|
if strings.ToLower(ctx.FormString("format")) != "pdf" {
|
|
ctx.Status(http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Determine effective file path and commit.
|
|
effectivePath, commit, ue := resolvePathAndCommit(ctx)
|
|
if ue.ErrorID != "" {
|
|
writeUserError(ctx, ue)
|
|
return
|
|
}
|
|
|
|
if !markup.IsMarkupFile(path.Base(effectivePath), "markdown") {
|
|
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
|
|
return
|
|
}
|
|
|
|
entry, err := commit.GetTreeEntryByPath(effectivePath)
|
|
if err != nil {
|
|
if git.IsErrNotExist(err) {
|
|
writeUserError(ctx, errNotFound("ERR_PDF_NOT_FOUND", "File not found."))
|
|
return
|
|
}
|
|
log.Error("pdfexport: GetTreeEntryByPath: %v", err)
|
|
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
|
return
|
|
}
|
|
if entry.IsDir() || entry.IsSubModule() {
|
|
writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files."))
|
|
return
|
|
}
|
|
|
|
blob := entry.Blob()
|
|
rc, err := blob.DataAsync()
|
|
if err != nil {
|
|
log.Error("pdfexport: blob.DataAsync: %v", err)
|
|
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
|
return
|
|
}
|
|
defer func() { _ = rc.Close() }()
|
|
|
|
md, ue := readBounded(rc, maxMarkdownBytes)
|
|
if ue.ErrorID != "" {
|
|
writeUserError(ctx, ue)
|
|
return
|
|
}
|
|
|
|
cfg, ue := configFromSetting()
|
|
if ue.ErrorID != "" {
|
|
writeUserError(ctx, ue)
|
|
return
|
|
}
|
|
cfgHash, err := configHash(cfg)
|
|
if err != nil {
|
|
log.Error("pdfexport: config hash: %v", err)
|
|
writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF."))
|
|
return
|
|
}
|
|
|
|
cacheKey := fmt.Sprintf("%d|%s|%s|%s|%s", ctx.Repo.Repository.ID, commit.ID.String(), effectivePath, cfgHash, ManifestSHA)
|
|
pdf, cacheHit, ue := getOrGeneratePDF(cacheKey, workerInput{
|
|
Markdown: string(md),
|
|
RepoMeta: workerRepoMeta{
|
|
Owner: ctx.Repo.Repository.OwnerName,
|
|
Repo: ctx.Repo.Repository.Name,
|
|
Path: effectivePath,
|
|
RepoID: ctx.Repo.Repository.ID,
|
|
CommitSHA: commit.ID.String(),
|
|
CommitTimeRFC3339: commit.Committer.When.UTC().Format(time.RFC3339),
|
|
},
|
|
Config: workerConfigWrap{PDF: cfg},
|
|
ManifestSHA: ManifestSHA,
|
|
})
|
|
if ue.ErrorID != "" {
|
|
writeUserError(ctx, ue)
|
|
return
|
|
}
|
|
|
|
filename := path.Base(effectivePath)
|
|
if strings.HasSuffix(strings.ToLower(filename), ".md") {
|
|
filename = filename[:len(filename)-3]
|
|
}
|
|
if filename == "" {
|
|
filename = "document"
|
|
}
|
|
filename += ".pdf"
|
|
|
|
ctx.Resp.Header().Set("Content-Type", "application/pdf")
|
|
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
|
|
ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=%q", filename))
|
|
if cacheHit {
|
|
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "HIT")
|
|
} else {
|
|
ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "MISS")
|
|
}
|
|
ctx.Resp.WriteHeader(http.StatusOK)
|
|
_, _ = ctx.Resp.Write(pdf)
|
|
}
|
|
|
|
func resolvePathAndCommit(ctx *context.Context) (string, *git.Commit, userError) {
|
|
if ctx.Repo.Repository == nil || ctx.Repo.GitRepo == nil {
|
|
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
|
}
|
|
|
|
ref := strings.TrimSpace(ctx.FormString("ref"))
|
|
commit := ctx.Repo.Commit
|
|
if ref != "" {
|
|
c, err := ctx.Repo.GitRepo.GetCommit(ref)
|
|
if err != nil {
|
|
if git.IsErrNotExist(err) {
|
|
return "", nil, errNotFound("ERR_PDF_REF_NOT_FOUND", "Commit not found.")
|
|
}
|
|
log.Error("pdfexport: GetCommit(ref): %v", err)
|
|
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
|
}
|
|
commit = c
|
|
}
|
|
if commit == nil {
|
|
return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
|
}
|
|
|
|
// For the legacy /raw/* route with an explicit ref query parameter, treat the wildcard as the filepath
|
|
// (avoid legacy ref guessing for deterministic cache keys).
|
|
effectivePath := ctx.Repo.TreePath
|
|
if ref != "" && isRawLegacyPath(ctx) {
|
|
effectivePath = strings.TrimPrefix(ctx.Params("*"), "/")
|
|
}
|
|
if effectivePath == "" {
|
|
return "", nil, errBadRequest("ERR_PDF_BAD_REQUEST", "Invalid request.")
|
|
}
|
|
return effectivePath, commit, userError{}
|
|
}
|
|
|
|
func isRawLegacyPath(ctx *context.Context) bool {
|
|
p := ctx.Req.URL.Path
|
|
idx := strings.Index(p, "/raw/")
|
|
if idx < 0 {
|
|
return false
|
|
}
|
|
after := p[idx+len("/raw/"):]
|
|
return !(strings.HasPrefix(after, "branch/") || strings.HasPrefix(after, "tag/") || strings.HasPrefix(after, "commit/") || strings.HasPrefix(after, "blob/"))
|
|
}
|
|
|
|
func readBounded(r io.Reader, limit int64) ([]byte, userError) {
|
|
lr := io.LimitReader(r, limit+1)
|
|
b, err := io.ReadAll(lr)
|
|
if err != nil {
|
|
return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
|
}
|
|
if int64(len(b)) > limit {
|
|
return nil, errBadRequest("ERR_PDF_TOO_LARGE", "File is too large to export.")
|
|
}
|
|
return b, userError{}
|
|
}
|
|
|
|
func getOrGeneratePDF(cacheKey string, input workerInput) ([]byte, bool, userError) {
|
|
cacheDir := filepath.Join(setting.AppDataPath, "pdfexport", "cache")
|
|
if err := os.MkdirAll(cacheDir, 0o750); err != nil {
|
|
log.Error("pdfexport: mkdir cache: %v", err)
|
|
return nil, false, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")
|
|
}
|
|
cacheFile := filepath.Join(cacheDir, sha256Hex([]byte(cacheKey))+".pdf")
|
|
|
|
if b, err := os.ReadFile(cacheFile); err == nil && len(b) > 0 {
|
|
return b, true, userError{}
|
|
}
|
|
|
|
pdf, ue := runWorker(input)
|
|
if ue.ErrorID != "" {
|
|
return nil, false, ue
|
|
}
|
|
|
|
tmp := cacheFile + ".tmp"
|
|
if err := os.WriteFile(tmp, pdf, 0o640); err == nil {
|
|
_ = os.Rename(tmp, cacheFile)
|
|
} else {
|
|
log.Error("pdfexport: write cache: %v", err)
|
|
}
|
|
|
|
return pdf, false, userError{}
|
|
}
|
|
|
|
func sha256Hex(b []byte) string {
|
|
h := sha256.Sum256(b)
|
|
return hex.EncodeToString(h[:])
|
|
}
|
|
|
|
func writeUserError(ctx *context.Context, ue userError) {
|
|
status := ue.Status
|
|
if status == 0 {
|
|
status = http.StatusInternalServerError
|
|
}
|
|
ctx.Resp.Header().Set("Content-Type", "application/json; charset=utf-8")
|
|
ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff")
|
|
ctx.Resp.WriteHeader(status)
|
|
_ = json.NewEncoder(ctx.Resp).Encode(map[string]string{
|
|
"error_id": ue.ErrorID,
|
|
"message": ue.Message,
|
|
})
|
|
}
|