// SPDX-License-Identifier: MIT package pdfexport import ( "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "io" "net/http" "os" "path" "path/filepath" "strings" "time" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/services/context" ) const ( maxMarkdownBytes = 4 << 20 // ManifestSHA must be kept in sync with worker/pdf/manifest.json for cache key stability. // It is used as part of the worker input and cache key (v0.1 determinism requirement). ManifestSHA = "e0e9ebe129de8fabcf2aab0a9509f248cd907d3684542e8d42e7e45b747cc956" ) // ServeMarkdownPDF handles GET /{owner}/{repo}/raw/{path}?ref=&format=pdf // It returns either PDF bytes or a safe JSON error envelope. func ServeMarkdownPDF(ctx *context.Context) { if strings.ToLower(ctx.FormString("format")) != "pdf" { ctx.Status(http.StatusNotFound) return } // Determine effective file path and commit. effectivePath, commit, ue := resolvePathAndCommit(ctx) if ue.ErrorID != "" { writeUserError(ctx, ue) return } if !markup.IsMarkupFile(path.Base(effectivePath), "markdown") { writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files.")) return } entry, err := commit.GetTreeEntryByPath(effectivePath) if err != nil { if git.IsErrNotExist(err) { writeUserError(ctx, errNotFound("ERR_PDF_NOT_FOUND", "File not found.")) return } log.Error("pdfexport: GetTreeEntryByPath: %v", err) writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")) return } if entry.IsDir() || entry.IsSubModule() { writeUserError(ctx, errBadRequest("ERR_PDF_NOT_MARKDOWN", "PDF export is only supported for Markdown files.")) return } blob := entry.Blob() rc, err := blob.DataAsync() if err != nil { log.Error("pdfexport: blob.DataAsync: %v", err) writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")) return } defer func() { _ = rc.Close() }() md, ue := readBounded(rc, maxMarkdownBytes) if ue.ErrorID != "" { writeUserError(ctx, ue) return } cfg, ue := configFromSetting() if ue.ErrorID != "" { writeUserError(ctx, ue) return } cfgHash, err := configHash(cfg) if err != nil { log.Error("pdfexport: config hash: %v", err) writeUserError(ctx, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.")) return } cacheKey := fmt.Sprintf("%d|%s|%s|%s|%s", ctx.Repo.Repository.ID, commit.ID.String(), effectivePath, cfgHash, ManifestSHA) pdf, cacheHit, ue := getOrGeneratePDF(cacheKey, workerInput{ Markdown: string(md), RepoMeta: workerRepoMeta{ Owner: ctx.Repo.Repository.OwnerName, Repo: ctx.Repo.Repository.Name, Path: effectivePath, RepoID: ctx.Repo.Repository.ID, CommitSHA: commit.ID.String(), CommitTimeRFC3339: commit.Committer.When.UTC().Format(time.RFC3339), }, Config: workerConfigWrap{PDF: cfg}, ManifestSHA: ManifestSHA, }) if ue.ErrorID != "" { writeUserError(ctx, ue) return } filename := path.Base(effectivePath) if strings.HasSuffix(strings.ToLower(filename), ".md") { filename = filename[:len(filename)-3] } if filename == "" { filename = "document" } filename += ".pdf" ctx.Resp.Header().Set("Content-Type", "application/pdf") ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff") ctx.Resp.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=%q", filename)) if cacheHit { ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "HIT") } else { ctx.Resp.Header().Set("X-Forgejo-PDF-Cache", "MISS") } ctx.Resp.WriteHeader(http.StatusOK) _, _ = ctx.Resp.Write(pdf) } func resolvePathAndCommit(ctx *context.Context) (string, *git.Commit, userError) { if ctx.Repo.Repository == nil || ctx.Repo.GitRepo == nil { return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.") } ref := strings.TrimSpace(ctx.FormString("ref")) commit := ctx.Repo.Commit if ref != "" { c, err := ctx.Repo.GitRepo.GetCommit(ref) if err != nil { if git.IsErrNotExist(err) { return "", nil, errNotFound("ERR_PDF_REF_NOT_FOUND", "Commit not found.") } log.Error("pdfexport: GetCommit(ref): %v", err) return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.") } commit = c } if commit == nil { return "", nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.") } // For the legacy /raw/* route with an explicit ref query parameter, treat the wildcard as the filepath // (avoid legacy ref guessing for deterministic cache keys). effectivePath := ctx.Repo.TreePath if ref != "" && isRawLegacyPath(ctx) { effectivePath = strings.TrimPrefix(ctx.Params("*"), "/") } if effectivePath == "" { return "", nil, errBadRequest("ERR_PDF_BAD_REQUEST", "Invalid request.") } return effectivePath, commit, userError{} } func isRawLegacyPath(ctx *context.Context) bool { p := ctx.Req.URL.Path idx := strings.Index(p, "/raw/") if idx < 0 { return false } after := p[idx+len("/raw/"):] return !(strings.HasPrefix(after, "branch/") || strings.HasPrefix(after, "tag/") || strings.HasPrefix(after, "commit/") || strings.HasPrefix(after, "blob/")) } func readBounded(r io.Reader, limit int64) ([]byte, userError) { lr := io.LimitReader(r, limit+1) b, err := io.ReadAll(lr) if err != nil { return nil, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.") } if int64(len(b)) > limit { return nil, errBadRequest("ERR_PDF_TOO_LARGE", "File is too large to export.") } return b, userError{} } func getOrGeneratePDF(cacheKey string, input workerInput) ([]byte, bool, userError) { cacheDir := filepath.Join(setting.AppDataPath, "pdfexport", "cache") if err := os.MkdirAll(cacheDir, 0o750); err != nil { log.Error("pdfexport: mkdir cache: %v", err) return nil, false, errInternal("ERR_PDF_INTERNAL", "Failed to export PDF.") } cacheFile := filepath.Join(cacheDir, sha256Hex([]byte(cacheKey))+".pdf") if b, err := os.ReadFile(cacheFile); err == nil && len(b) > 0 { return b, true, userError{} } pdf, ue := runWorker(input) if ue.ErrorID != "" { return nil, false, ue } tmp := cacheFile + ".tmp" if err := os.WriteFile(tmp, pdf, 0o640); err == nil { _ = os.Rename(tmp, cacheFile) } else { log.Error("pdfexport: write cache: %v", err) } return pdf, false, userError{} } func sha256Hex(b []byte) string { h := sha256.Sum256(b) return hex.EncodeToString(h[:]) } func writeUserError(ctx *context.Context, ue userError) { status := ue.Status if status == 0 { status = http.StatusInternalServerError } ctx.Resp.Header().Set("Content-Type", "application/json; charset=utf-8") ctx.Resp.Header().Set("X-Content-Type-Options", "nosniff") ctx.Resp.WriteHeader(status) _ = json.NewEncoder(ctx.Resp).Encode(map[string]string{ "error_id": ue.ErrorID, "message": ue.Message, }) }