/** * Upload Route - POST /api/upload * Handles PDF file uploads with validation, storage, and OCR queue processing */ import express from 'express'; import multer from 'multer'; import { v4 as uuidv4 } from 'uuid'; import crypto from 'crypto'; import fs from 'fs/promises'; import path from 'path'; import { fileURLToPath } from 'url'; import { dirname, join } from 'path'; import { getDb } from '../db/db.js'; import { validateFile, sanitizeFilename } from '../services/file-safety.js'; import { addOcrJob } from '../services/queue.js'; const __dirname = dirname(fileURLToPath(import.meta.url)); const router = express.Router(); // Configure multer for memory storage (we'll validate before saving) const upload = multer({ storage: multer.memoryStorage(), limits: { fileSize: parseInt(process.env.MAX_FILE_SIZE || '52428800') // 50MB } }); const UPLOAD_DIR = process.env.UPLOAD_DIR || join(__dirname, '../../uploads'); // Ensure upload directory exists await fs.mkdir(UPLOAD_DIR, { recursive: true }); /** * POST /api/upload * Upload PDF file and queue for OCR processing * * @body {File} file - PDF file to upload * @body {string} title - Document title * @body {string} documentType - Document type (owner-manual, component-manual, etc) * @body {string} organizationId - Organization UUID * @body {string} [entityId] - Optional entity UUID * @body {string} [componentId] - Optional component UUID * * @returns {Object} { jobId, documentId } */ router.post('/', upload.single('file'), async (req, res) => { try { const file = req.file; const { title, documentType, organizationId, entityId, componentId, subEntityId } = req.body; // TODO: Authentication middleware should provide req.user const userId = req.user?.id || 'test-user-id'; // Temporary for testing // Validate required fields if (!file) { return res.status(400).json({ error: 'No file uploaded' }); } if (!title || !documentType || !organizationId) { return res.status(400).json({ error: 'Missing required fields: title, documentType, organizationId' }); } // Validate file safety const validation = await validateFile(file); if (!validation.valid) { return res.status(400).json({ error: validation.error }); } // Generate UUIDs const documentId = uuidv4(); const jobId = uuidv4(); // Calculate file hash (SHA256) for deduplication const fileHash = crypto .createHash('sha256') .update(file.buffer) .digest('hex'); // Sanitize filename const sanitizedFilename = sanitizeFilename(file.originalname); const fileExt = path.extname(sanitizedFilename); const storedFilename = `${documentId}${fileExt}`; const filePath = join(UPLOAD_DIR, storedFilename); // Save file to disk await fs.writeFile(filePath, file.buffer); // Get database connection const db = getDb(); // Check for duplicate file hash (optional deduplication) const duplicateCheck = db.prepare( 'SELECT id, title, file_path FROM documents WHERE file_hash = ? AND organization_id = ? AND status != ?' ).get(fileHash, organizationId, 'deleted'); if (duplicateCheck) { // File already exists - optionally return existing document // For now, we'll allow duplicates but log it console.log(`Duplicate file detected: ${duplicateCheck.id}, proceeding with new upload`); } const timestamp = Date.now(); // Insert document record const insertDocument = db.prepare(` INSERT INTO documents ( id, organization_id, entity_id, sub_entity_id, component_id, uploaded_by, title, document_type, file_path, file_name, file_size, file_hash, mime_type, status, created_at, updated_at ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); insertDocument.run( documentId, organizationId, entityId || null, subEntityId || null, componentId || null, userId, title, documentType, filePath, sanitizedFilename, file.size, fileHash, 'application/pdf', 'processing', timestamp, timestamp ); // Insert OCR job record const insertJob = db.prepare(` INSERT INTO ocr_jobs ( id, document_id, status, progress, created_at ) VALUES (?, ?, ?, ?, ?) `); insertJob.run( jobId, documentId, 'pending', 0, timestamp ); // Queue OCR job await addOcrJob(documentId, jobId, { filePath, fileName: sanitizedFilename, organizationId, userId }); // Return success response res.status(201).json({ jobId, documentId, message: 'File uploaded successfully and queued for processing' }); } catch (error) { console.error('Upload error:', error); // Clean up file if it was saved if (req.file && req.file.path) { try { await fs.unlink(req.file.path); } catch (unlinkError) { console.error('Error cleaning up file:', unlinkError); } } res.status(500).json({ error: 'Upload failed', message: error.message }); } }); export default router;