navidocs/server/routes/images.js

/**
 * Images Route - API endpoints for image retrieval
 * Handles serving extracted images from documents
 */

import express from 'express';
import { getDb } from '../db/db.js';
import path from 'path';
import { fileURLToPath } from 'url';
import { dirname } from 'path';
import fs from 'fs';
import rateLimit from 'express-rate-limit';

const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);

const router = express.Router();

// Rate limiter for image endpoints (more permissive than general API)
const imageLimiter = rateLimit({
  windowMs: parseInt(process.env.IMAGE_RATE_LIMIT_WINDOW_MS || '60000'), // 1 minute
  max: parseInt(process.env.IMAGE_RATE_LIMIT_MAX_REQUESTS || '200'),
  standardHeaders: true,
  legacyHeaders: false,
  message: 'Too many image requests, please try again later'
});

/**
 * Verify document access helper function
 * Checks if user has permission to access the document
 */
async function verifyDocumentAccess(documentId, userId, db) {
  const document = db.prepare('SELECT id, organization_id FROM documents WHERE id = ?').get(documentId);

  if (!document) {
    return { hasAccess: false, error: 'Document not found', status: 404 };
  }

  const hasAccess = db.prepare(`
    SELECT 1 FROM user_organizations WHERE user_id = ? AND organization_id = ?
    UNION SELECT 1 FROM documents WHERE id = ? AND uploaded_by = ?
    UNION SELECT 1 FROM document_shares WHERE document_id = ? AND shared_with = ?
  `).get(userId, document.organization_id, documentId, userId, documentId, userId);

  if (!hasAccess) {
    return { hasAccess: false, error: 'Access denied', status: 403 };
  }

  return { hasAccess: true, document };
}

/**
 * GET /api/documents/:id/images
 * Get all images for a specific document
 *
 * @param {string} id - Document UUID
 * @returns {Object} Array of image metadata
 */
router.get('/documents/:id/images', async (req, res) => {
  try {
    const { id } = req.params;

    // Validate UUID format
    const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
    if (!uuidRegex.test(id)) {
      return res.status(400).json({ error: 'Invalid document ID format' });
    }

    // TODO: Authentication middleware should provide req.user
    const userId = req.user?.id || 'test-user-id';
    const db = getDb();

    // Verify document access
    const accessCheck = await verifyDocumentAccess(id, userId, db);
    if (!accessCheck.hasAccess) {
      return res.status(accessCheck.status).json({ error: accessCheck.error });
    }

    // Get all images for the document
    const images = db.prepare(`
      SELECT
        id,
        documentId,
        pageNumber,
        imageIndex,
        imagePath,
        imageFormat,
        width,
        height,
        position,
        extractedText,
        textConfidence,
        anchorTextBefore,
        anchorTextAfter,
        createdAt
      FROM document_images
      WHERE documentId = ?
      ORDER BY pageNumber ASC, imageIndex ASC
    `).all(id);

    // Parse position JSON
    const formattedImages = images.map(img => ({
      id: img.id,
      documentId: img.documentId,
      pageNumber: img.pageNumber,
      imageIndex: img.imageIndex,
      imageFormat: img.imageFormat,
      width: img.width,
      height: img.height,
      position: img.position ? JSON.parse(img.position) : null,
      extractedText: img.extractedText,
      textConfidence: img.textConfidence,
      anchorTextBefore: img.anchorTextBefore,
      anchorTextAfter: img.anchorTextAfter,
      createdAt: img.createdAt,
      imageUrl: `/api/images/${img.id}`
    }));

    console.log(`Retrieved ${formattedImages.length} images for document ${id}`);

    res.json({
      documentId: id,
      imageCount: formattedImages.length,
      images: formattedImages
    });

  } catch (error) {
    console.error('Get document images error:', error);
    res.status(500).json({
      error: 'Failed to retrieve images',
      message: error.message
    });
  }
});

/**
 * GET /api/documents/:id/pages/:pageNum/images
 * Get images for a specific page of a document
 *
 * @param {string} id - Document UUID
 * @param {number} pageNum - Page number (1-based)
 * @returns {Object} Array of image metadata for the page
 */
router.get('/documents/:id/pages/:pageNum/images', async (req, res) => {
  try {
    const { id, pageNum } = req.params;

    // Validate UUID format
    const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
    if (!uuidRegex.test(id)) {
      return res.status(400).json({ error: 'Invalid document ID format' });
    }

    // Validate page number
    const pageNumber = parseInt(pageNum);
    if (isNaN(pageNumber) || pageNumber < 1) {
      return res.status(400).json({ error: 'Invalid page number' });
    }

    // TODO: Authentication middleware should provide req.user
    const userId = req.user?.id || 'test-user-id';
    const db = getDb();

    // Verify document access
    const accessCheck = await verifyDocumentAccess(id, userId, db);
    if (!accessCheck.hasAccess) {
      return res.status(accessCheck.status).json({ error: accessCheck.error });
    }

    // Verify page exists
    const page = db.prepare(`
      SELECT id, page_number, document_id
      FROM document_pages
      WHERE document_id = ? AND page_number = ?
    `).get(id, pageNumber);

    if (!page) {
      return res.status(404).json({
        error: 'Page not found',
        message: `Page ${pageNumber} does not exist in this document`
      });
    }

    // Get images for the specific page
    const images = db.prepare(`
      SELECT
        id,
        documentId,
        pageNumber,
        imageIndex,
        imagePath,
        imageFormat,
        width,
        height,
        position,
        extractedText,
        textConfidence,
        anchorTextBefore,
        anchorTextAfter,
        createdAt
      FROM document_images
      WHERE documentId = ? AND pageNumber = ?
      ORDER BY imageIndex ASC
    `).all(id, pageNumber);

    // Format response
    const formattedImages = images.map(img => ({
      id: img.id,
      documentId: img.documentId,
      pageNumber: img.pageNumber,
      imageIndex: img.imageIndex,
      imageFormat: img.imageFormat,
      width: img.width,
      height: img.height,
      position: img.position ? JSON.parse(img.position) : null,
      extractedText: img.extractedText,
      textConfidence: img.textConfidence,
      anchorTextBefore: img.anchorTextBefore,
      anchorTextAfter: img.anchorTextAfter,
      createdAt: img.createdAt,
      imageUrl: `/api/images/${img.id}`
    }));

    console.log(`Retrieved ${formattedImages.length} images for document ${id} page ${pageNumber}`);

    res.json({
      documentId: id,
      pageNumber: pageNumber,
      imageCount: formattedImages.length,
      images: formattedImages
    });

  } catch (error) {
    console.error('Get page images error:', error);
    res.status(500).json({
      error: 'Failed to retrieve page images',
      message: error.message
    });
  }
});

/**
 * GET /api/images/:imageId
 * Serve image file as PNG/JPEG stream
 *
 * @param {string} imageId - Image UUID
 * @returns {Stream} Image file stream with proper Content-Type
 */
router.get('/images/:imageId', imageLimiter, async (req, res) => {
  try {
    const { imageId } = req.params;

    // Validate image ID format (img_<uuid>_p<num>_<num>_<timestamp> or just UUID)
    const imageIdRegex = /^(img_[0-9a-f-]+_p\d+_\d+_\d+|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i;
    if (!imageIdRegex.test(imageId)) {
      return res.status(400).json({ error: 'Invalid image ID format' });
    }

    // TODO: Authentication middleware should provide req.user
    const userId = req.user?.id || 'test-user-id';
    const db = getDb();

    // Get image metadata
    const image = db.prepare(`
      SELECT
        id,
        documentId,
        imagePath,
        imageFormat
      FROM document_images
      WHERE id = ?
    `).get(imageId);

    if (!image) {
      return res.status(404).json({ error: 'Image not found' });
    }

    // Verify document access
    const accessCheck = await verifyDocumentAccess(image.documentId, userId, db);
    if (!accessCheck.hasAccess) {
      return res.status(accessCheck.status).json({ error: accessCheck.error });
    }

    // Resolve absolute path relative to project root
    // imagePath is like "/uploads/..." so we need to join with project root
    const projectRoot = path.join(__dirname, '../..');
    const absPath = path.join(projectRoot, image.imagePath);

    if (!fs.existsSync(absPath)) {
      console.error(`Image file not found: ${absPath}`);
      return res.status(404).json({
        error: 'Image file not found',
        message: 'The image file is missing from storage'
      });
    }

    // Security check: ensure file is within expected directory
    // This prevents directory traversal attacks
    const uploadDir = path.join(projectRoot, 'uploads');
    const normalizedPath = path.normalize(absPath);
    const normalizedUploadDir = path.normalize(uploadDir);

    if (!normalizedPath.startsWith(normalizedUploadDir)) {
      console.error(`Security violation: Path traversal attempt - ${absPath}`);
      console.error(`Expected base directory: ${normalizedUploadDir}`);
      console.error(`Actual file path: ${normalizedPath}`);
      return res.status(403).json({ error: 'Access denied' });
    }

    // Set Content-Type based on image format
    const contentType = image.imageFormat === 'jpeg' || image.imageFormat === 'jpg'
      ? 'image/jpeg'
      : 'image/png';

    // Set headers
    res.setHeader('Content-Type', contentType);
    res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
    res.setHeader('Content-Disposition', `inline; filename="image-${imageId}.${image.imageFormat}"`);

    // Stream the file
    const fileStream = fs.createReadStream(absPath);

    fileStream.on('error', (error) => {
      console.error('File stream error:', error);
      if (!res.headersSent) {
        res.status(500).json({
          error: 'Failed to stream image',
          message: error.message
        });
      }
    });

    fileStream.pipe(res);

    console.log(`Serving image ${imageId} (${contentType}) from ${absPath}`);

  } catch (error) {
    console.error('Serve image error:', error);
    if (!res.headersSent) {
      res.status(500).json({
        error: 'Failed to serve image',
        message: error.message
      });
    }
  }
});

export default router;