This commit addresses multiple critical fixes and adds new functionality for the NaviDocs local testing environment (port 8083): Search Fixes: - Fixed search to use backend /api/search instead of direct Meilisearch - Resolves network accessibility issue when accessing from external IPs - Search now works from http://172.29.75.55:8083/search PDF Text Selection: - Added PDF.js text layer for selectable text - Imported pdf_viewer.css for proper text layer styling - Changed text layer opacity to 1 for better interaction - Added user-select: text for improved text selection - Pink selection highlight (rgba(255, 92, 178, 0.3)) Database Cleanup: - Created cleanup scripts to remove 20 duplicate documents - Removed 753 orphaned entries from Meilisearch index - Cleaned 17 document folders from filesystem - Kept only newest version of each document - Scripts: clean-duplicates.js, clean-meilisearch-orphans.js Auto-Fill Feature: - New /api/upload/quick-ocr endpoint for first-page OCR - Automatically extracts metadata from PDFs on file selection - Detects: boat make, model, year, name, and document title - Checks both OCR text and filename for boat name - Auto-fills upload form with extracted data - Shows loading indicator during metadata extraction - Graceful fallback to filename if OCR fails Tenant Management: - Updated organization ID to use boat name as tenant - Falls back to "Liliane 1" for single-tenant setup - Each boat becomes a unique tenant in the system Files Changed: - client/src/views/DocumentView.vue - Text layer implementation - client/src/composables/useSearch.js - Backend API integration - client/src/components/UploadModal.vue - Auto-fill feature - server/routes/quick-ocr.js - OCR endpoint (new) - server/index.js - Route registration - server/scripts/* - Cleanup utilities (new) Testing: All features tested on local deployment at http://172.29.75.55:8083 - Backend: http://localhost:8001 - Frontend: http://localhost:8083 - Meilisearch: http://localhost:7700 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
348 lines
10 KiB
JavaScript
348 lines
10 KiB
JavaScript
/**
|
|
* Images Route - API endpoints for image retrieval
|
|
* Handles serving extracted images from documents
|
|
*/
|
|
|
|
import express from 'express';
|
|
import { getDb } from '../db/db.js';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import { dirname } from 'path';
|
|
import fs from 'fs';
|
|
import rateLimit from 'express-rate-limit';
|
|
|
|
const __filename = fileURLToPath(import.meta.url);
|
|
const __dirname = dirname(__filename);
|
|
|
|
const router = express.Router();
|
|
|
|
// Rate limiter for image endpoints (more permissive than general API)
|
|
const imageLimiter = rateLimit({
|
|
windowMs: parseInt(process.env.IMAGE_RATE_LIMIT_WINDOW_MS || '60000'), // 1 minute
|
|
max: parseInt(process.env.IMAGE_RATE_LIMIT_MAX_REQUESTS || '200'),
|
|
standardHeaders: true,
|
|
legacyHeaders: false,
|
|
message: 'Too many image requests, please try again later'
|
|
});
|
|
|
|
/**
|
|
* Verify document access helper function
|
|
* Checks if user has permission to access the document
|
|
*/
|
|
async function verifyDocumentAccess(documentId, userId, db) {
|
|
const document = db.prepare('SELECT id, organization_id FROM documents WHERE id = ?').get(documentId);
|
|
|
|
if (!document) {
|
|
return { hasAccess: false, error: 'Document not found', status: 404 };
|
|
}
|
|
|
|
const hasAccess = db.prepare(`
|
|
SELECT 1 FROM user_organizations WHERE user_id = ? AND organization_id = ?
|
|
UNION SELECT 1 FROM documents WHERE id = ? AND uploaded_by = ?
|
|
UNION SELECT 1 FROM document_shares WHERE document_id = ? AND shared_with = ?
|
|
`).get(userId, document.organization_id, documentId, userId, documentId, userId);
|
|
|
|
if (!hasAccess) {
|
|
return { hasAccess: false, error: 'Access denied', status: 403 };
|
|
}
|
|
|
|
return { hasAccess: true, document };
|
|
}
|
|
|
|
/**
|
|
* GET /api/documents/:id/images
|
|
* Get all images for a specific document
|
|
*
|
|
* @param {string} id - Document UUID
|
|
* @returns {Object} Array of image metadata
|
|
*/
|
|
router.get('/documents/:id/images', async (req, res) => {
|
|
try {
|
|
const { id } = req.params;
|
|
|
|
// Validate UUID format
|
|
const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
if (!uuidRegex.test(id)) {
|
|
return res.status(400).json({ error: 'Invalid document ID format' });
|
|
}
|
|
|
|
// TODO: Authentication middleware should provide req.user
|
|
const userId = req.user?.id || 'test-user-id';
|
|
const db = getDb();
|
|
|
|
// Verify document access
|
|
const accessCheck = await verifyDocumentAccess(id, userId, db);
|
|
if (!accessCheck.hasAccess) {
|
|
return res.status(accessCheck.status).json({ error: accessCheck.error });
|
|
}
|
|
|
|
// Get all images for the document
|
|
const images = db.prepare(`
|
|
SELECT
|
|
id,
|
|
documentId,
|
|
pageNumber,
|
|
imageIndex,
|
|
imagePath,
|
|
imageFormat,
|
|
width,
|
|
height,
|
|
position,
|
|
extractedText,
|
|
textConfidence,
|
|
anchorTextBefore,
|
|
anchorTextAfter,
|
|
createdAt
|
|
FROM document_images
|
|
WHERE documentId = ?
|
|
ORDER BY pageNumber ASC, imageIndex ASC
|
|
`).all(id);
|
|
|
|
// Parse position JSON
|
|
const formattedImages = images.map(img => ({
|
|
id: img.id,
|
|
documentId: img.documentId,
|
|
pageNumber: img.pageNumber,
|
|
imageIndex: img.imageIndex,
|
|
imageFormat: img.imageFormat,
|
|
width: img.width,
|
|
height: img.height,
|
|
position: img.position ? JSON.parse(img.position) : null,
|
|
extractedText: img.extractedText,
|
|
textConfidence: img.textConfidence,
|
|
anchorTextBefore: img.anchorTextBefore,
|
|
anchorTextAfter: img.anchorTextAfter,
|
|
createdAt: img.createdAt,
|
|
imageUrl: `/api/images/${img.id}`
|
|
}));
|
|
|
|
console.log(`Retrieved ${formattedImages.length} images for document ${id}`);
|
|
|
|
res.json({
|
|
documentId: id,
|
|
imageCount: formattedImages.length,
|
|
images: formattedImages
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('Get document images error:', error);
|
|
res.status(500).json({
|
|
error: 'Failed to retrieve images',
|
|
message: error.message
|
|
});
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/documents/:id/pages/:pageNum/images
|
|
* Get images for a specific page of a document
|
|
*
|
|
* @param {string} id - Document UUID
|
|
* @param {number} pageNum - Page number (1-based)
|
|
* @returns {Object} Array of image metadata for the page
|
|
*/
|
|
router.get('/documents/:id/pages/:pageNum/images', async (req, res) => {
|
|
try {
|
|
const { id, pageNum } = req.params;
|
|
|
|
// Validate UUID format
|
|
const uuidRegex = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
|
|
if (!uuidRegex.test(id)) {
|
|
return res.status(400).json({ error: 'Invalid document ID format' });
|
|
}
|
|
|
|
// Validate page number
|
|
const pageNumber = parseInt(pageNum);
|
|
if (isNaN(pageNumber) || pageNumber < 1) {
|
|
return res.status(400).json({ error: 'Invalid page number' });
|
|
}
|
|
|
|
// TODO: Authentication middleware should provide req.user
|
|
const userId = req.user?.id || 'test-user-id';
|
|
const db = getDb();
|
|
|
|
// Verify document access
|
|
const accessCheck = await verifyDocumentAccess(id, userId, db);
|
|
if (!accessCheck.hasAccess) {
|
|
return res.status(accessCheck.status).json({ error: accessCheck.error });
|
|
}
|
|
|
|
// Verify page exists
|
|
const page = db.prepare(`
|
|
SELECT id, page_number, document_id
|
|
FROM document_pages
|
|
WHERE document_id = ? AND page_number = ?
|
|
`).get(id, pageNumber);
|
|
|
|
if (!page) {
|
|
return res.status(404).json({
|
|
error: 'Page not found',
|
|
message: `Page ${pageNumber} does not exist in this document`
|
|
});
|
|
}
|
|
|
|
// Get images for the specific page
|
|
const images = db.prepare(`
|
|
SELECT
|
|
id,
|
|
documentId,
|
|
pageNumber,
|
|
imageIndex,
|
|
imagePath,
|
|
imageFormat,
|
|
width,
|
|
height,
|
|
position,
|
|
extractedText,
|
|
textConfidence,
|
|
anchorTextBefore,
|
|
anchorTextAfter,
|
|
createdAt
|
|
FROM document_images
|
|
WHERE documentId = ? AND pageNumber = ?
|
|
ORDER BY imageIndex ASC
|
|
`).all(id, pageNumber);
|
|
|
|
// Format response
|
|
const formattedImages = images.map(img => ({
|
|
id: img.id,
|
|
documentId: img.documentId,
|
|
pageNumber: img.pageNumber,
|
|
imageIndex: img.imageIndex,
|
|
imageFormat: img.imageFormat,
|
|
width: img.width,
|
|
height: img.height,
|
|
position: img.position ? JSON.parse(img.position) : null,
|
|
extractedText: img.extractedText,
|
|
textConfidence: img.textConfidence,
|
|
anchorTextBefore: img.anchorTextBefore,
|
|
anchorTextAfter: img.anchorTextAfter,
|
|
createdAt: img.createdAt,
|
|
imageUrl: `/api/images/${img.id}`
|
|
}));
|
|
|
|
console.log(`Retrieved ${formattedImages.length} images for document ${id} page ${pageNumber}`);
|
|
|
|
res.json({
|
|
documentId: id,
|
|
pageNumber: pageNumber,
|
|
imageCount: formattedImages.length,
|
|
images: formattedImages
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('Get page images error:', error);
|
|
res.status(500).json({
|
|
error: 'Failed to retrieve page images',
|
|
message: error.message
|
|
});
|
|
}
|
|
});
|
|
|
|
/**
|
|
* GET /api/images/:imageId
|
|
* Serve image file as PNG/JPEG stream
|
|
*
|
|
* @param {string} imageId - Image UUID
|
|
* @returns {Stream} Image file stream with proper Content-Type
|
|
*/
|
|
router.get('/images/:imageId', imageLimiter, async (req, res) => {
|
|
try {
|
|
const { imageId } = req.params;
|
|
|
|
// Validate image ID format (img_<uuid>_p<num>_<num>_<timestamp> or just UUID)
|
|
const imageIdRegex = /^(img_[0-9a-f-]+_p\d+_\d+_\d+|[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})$/i;
|
|
if (!imageIdRegex.test(imageId)) {
|
|
return res.status(400).json({ error: 'Invalid image ID format' });
|
|
}
|
|
|
|
// TODO: Authentication middleware should provide req.user
|
|
const userId = req.user?.id || 'test-user-id';
|
|
const db = getDb();
|
|
|
|
// Get image metadata
|
|
const image = db.prepare(`
|
|
SELECT
|
|
id,
|
|
documentId,
|
|
imagePath,
|
|
imageFormat
|
|
FROM document_images
|
|
WHERE id = ?
|
|
`).get(imageId);
|
|
|
|
if (!image) {
|
|
return res.status(404).json({ error: 'Image not found' });
|
|
}
|
|
|
|
// Verify document access
|
|
const accessCheck = await verifyDocumentAccess(image.documentId, userId, db);
|
|
if (!accessCheck.hasAccess) {
|
|
return res.status(accessCheck.status).json({ error: accessCheck.error });
|
|
}
|
|
|
|
// Resolve absolute path relative to project root
|
|
// imagePath is like "/uploads/..." so we need to join with project root
|
|
const projectRoot = path.join(__dirname, '../..');
|
|
const absPath = path.join(projectRoot, image.imagePath);
|
|
|
|
if (!fs.existsSync(absPath)) {
|
|
console.error(`Image file not found: ${absPath}`);
|
|
return res.status(404).json({
|
|
error: 'Image file not found',
|
|
message: 'The image file is missing from storage'
|
|
});
|
|
}
|
|
|
|
// Security check: ensure file is within expected directory
|
|
// This prevents directory traversal attacks
|
|
const uploadDir = path.join(projectRoot, 'uploads');
|
|
const normalizedPath = path.normalize(absPath);
|
|
const normalizedUploadDir = path.normalize(uploadDir);
|
|
|
|
if (!normalizedPath.startsWith(normalizedUploadDir)) {
|
|
console.error(`Security violation: Path traversal attempt - ${absPath}`);
|
|
console.error(`Expected base directory: ${normalizedUploadDir}`);
|
|
console.error(`Actual file path: ${normalizedPath}`);
|
|
return res.status(403).json({ error: 'Access denied' });
|
|
}
|
|
|
|
// Set Content-Type based on image format
|
|
const contentType = image.imageFormat === 'jpeg' || image.imageFormat === 'jpg'
|
|
? 'image/jpeg'
|
|
: 'image/png';
|
|
|
|
// Set headers
|
|
res.setHeader('Content-Type', contentType);
|
|
res.setHeader('Cache-Control', 'public, max-age=31536000'); // Cache for 1 year
|
|
res.setHeader('Content-Disposition', `inline; filename="image-${imageId}.${image.imageFormat}"`);
|
|
|
|
// Stream the file
|
|
const fileStream = fs.createReadStream(absPath);
|
|
|
|
fileStream.on('error', (error) => {
|
|
console.error('File stream error:', error);
|
|
if (!res.headersSent) {
|
|
res.status(500).json({
|
|
error: 'Failed to stream image',
|
|
message: error.message
|
|
});
|
|
}
|
|
});
|
|
|
|
fileStream.pipe(res);
|
|
|
|
console.log(`Serving image ${imageId} (${contentType}) from ${absPath}`);
|
|
|
|
} catch (error) {
|
|
console.error('Serve image error:', error);
|
|
if (!res.headersSent) {
|
|
res.status(500).json({
|
|
error: 'Failed to serve image',
|
|
message: error.message
|
|
});
|
|
}
|
|
}
|
|
});
|
|
|
|
export default router;
|