## Backend (server/) - Express 5 API with security middleware (helmet, rate limiting) - SQLite database with WAL mode (schema from docs/architecture/) - Meilisearch integration with tenant tokens - BullMQ + Redis background job queue - OCR pipeline with Tesseract.js - File safety validation (extension, MIME, size) - 4 API route modules: upload, jobs, search, documents ## Frontend (client/) - Vue 3 with Composition API (<script setup>) - Vite 5 build system with HMR - Tailwind CSS (Meilisearch-inspired design) - UploadModal with drag-and-drop - FigureZoom component (ported from lilian1) - Meilisearch search integration with tenant tokens - Job polling composable - Clean SVG icons (no emojis) ## Code Extraction - ✅ manuals.js → UploadModal.vue, useJobPolling.js - ✅ figure-zoom.js → FigureZoom.vue - ✅ service-worker.js → client/public/service-worker.js (TODO) - ✅ glossary.json → Merged into Meilisearch synonyms - ❌ Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk) ## Documentation - Complete extraction plan in docs/analysis/ - README with quick start guide - Architecture summary in docs/architecture/ ## Build Status - Server dependencies: ✅ Installed (234 packages) - Client dependencies: ✅ Installed (160 packages) - Client build: ✅ Successful (2.63s) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
184 lines
5.1 KiB
JavaScript
184 lines
5.1 KiB
JavaScript
/**
|
|
* Upload Route - POST /api/upload
|
|
* Handles PDF file uploads with validation, storage, and OCR queue processing
|
|
*/
|
|
|
|
import express from 'express';
|
|
import multer from 'multer';
|
|
import { v4 as uuidv4 } from 'uuid';
|
|
import crypto from 'crypto';
|
|
import fs from 'fs/promises';
|
|
import path from 'path';
|
|
import { fileURLToPath } from 'url';
|
|
import { dirname, join } from 'path';
|
|
import { getDb } from '../db/db.js';
|
|
import { validateFile, sanitizeFilename } from '../services/file-safety.js';
|
|
import { addOcrJob } from '../services/queue.js';
|
|
|
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
const router = express.Router();
|
|
|
|
// Configure multer for memory storage (we'll validate before saving)
|
|
const upload = multer({
|
|
storage: multer.memoryStorage(),
|
|
limits: {
|
|
fileSize: parseInt(process.env.MAX_FILE_SIZE || '52428800') // 50MB
|
|
}
|
|
});
|
|
|
|
const UPLOAD_DIR = process.env.UPLOAD_DIR || join(__dirname, '../../uploads');
|
|
|
|
// Ensure upload directory exists
|
|
await fs.mkdir(UPLOAD_DIR, { recursive: true });
|
|
|
|
/**
|
|
* POST /api/upload
|
|
* Upload PDF file and queue for OCR processing
|
|
*
|
|
* @body {File} file - PDF file to upload
|
|
* @body {string} title - Document title
|
|
* @body {string} documentType - Document type (owner-manual, component-manual, etc)
|
|
* @body {string} organizationId - Organization UUID
|
|
* @body {string} [entityId] - Optional entity UUID
|
|
* @body {string} [componentId] - Optional component UUID
|
|
*
|
|
* @returns {Object} { jobId, documentId }
|
|
*/
|
|
router.post('/', upload.single('file'), async (req, res) => {
|
|
try {
|
|
const file = req.file;
|
|
const { title, documentType, organizationId, entityId, componentId, subEntityId } = req.body;
|
|
|
|
// TODO: Authentication middleware should provide req.user
|
|
const userId = req.user?.id || 'test-user-id'; // Temporary for testing
|
|
|
|
// Validate required fields
|
|
if (!file) {
|
|
return res.status(400).json({ error: 'No file uploaded' });
|
|
}
|
|
|
|
if (!title || !documentType || !organizationId) {
|
|
return res.status(400).json({
|
|
error: 'Missing required fields: title, documentType, organizationId'
|
|
});
|
|
}
|
|
|
|
// Validate file safety
|
|
const validation = await validateFile(file);
|
|
if (!validation.valid) {
|
|
return res.status(400).json({ error: validation.error });
|
|
}
|
|
|
|
// Generate UUIDs
|
|
const documentId = uuidv4();
|
|
const jobId = uuidv4();
|
|
|
|
// Calculate file hash (SHA256) for deduplication
|
|
const fileHash = crypto
|
|
.createHash('sha256')
|
|
.update(file.buffer)
|
|
.digest('hex');
|
|
|
|
// Sanitize filename
|
|
const sanitizedFilename = sanitizeFilename(file.originalname);
|
|
const fileExt = path.extname(sanitizedFilename);
|
|
const storedFilename = `${documentId}${fileExt}`;
|
|
const filePath = join(UPLOAD_DIR, storedFilename);
|
|
|
|
// Save file to disk
|
|
await fs.writeFile(filePath, file.buffer);
|
|
|
|
// Get database connection
|
|
const db = getDb();
|
|
|
|
// Check for duplicate file hash (optional deduplication)
|
|
const duplicateCheck = db.prepare(
|
|
'SELECT id, title, file_path FROM documents WHERE file_hash = ? AND organization_id = ? AND status != ?'
|
|
).get(fileHash, organizationId, 'deleted');
|
|
|
|
if (duplicateCheck) {
|
|
// File already exists - optionally return existing document
|
|
// For now, we'll allow duplicates but log it
|
|
console.log(`Duplicate file detected: ${duplicateCheck.id}, proceeding with new upload`);
|
|
}
|
|
|
|
const timestamp = Date.now();
|
|
|
|
// Insert document record
|
|
const insertDocument = db.prepare(`
|
|
INSERT INTO documents (
|
|
id, organization_id, entity_id, sub_entity_id, component_id, uploaded_by,
|
|
title, document_type, file_path, file_name, file_size, file_hash, mime_type,
|
|
status, created_at, updated_at
|
|
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`);
|
|
|
|
insertDocument.run(
|
|
documentId,
|
|
organizationId,
|
|
entityId || null,
|
|
subEntityId || null,
|
|
componentId || null,
|
|
userId,
|
|
title,
|
|
documentType,
|
|
filePath,
|
|
sanitizedFilename,
|
|
file.size,
|
|
fileHash,
|
|
'application/pdf',
|
|
'processing',
|
|
timestamp,
|
|
timestamp
|
|
);
|
|
|
|
// Insert OCR job record
|
|
const insertJob = db.prepare(`
|
|
INSERT INTO ocr_jobs (
|
|
id, document_id, status, progress, created_at
|
|
) VALUES (?, ?, ?, ?, ?)
|
|
`);
|
|
|
|
insertJob.run(
|
|
jobId,
|
|
documentId,
|
|
'pending',
|
|
0,
|
|
timestamp
|
|
);
|
|
|
|
// Queue OCR job
|
|
await addOcrJob(documentId, jobId, {
|
|
filePath,
|
|
fileName: sanitizedFilename,
|
|
organizationId,
|
|
userId
|
|
});
|
|
|
|
// Return success response
|
|
res.status(201).json({
|
|
jobId,
|
|
documentId,
|
|
message: 'File uploaded successfully and queued for processing'
|
|
});
|
|
|
|
} catch (error) {
|
|
console.error('Upload error:', error);
|
|
|
|
// Clean up file if it was saved
|
|
if (req.file && req.file.path) {
|
|
try {
|
|
await fs.unlink(req.file.path);
|
|
} catch (unlinkError) {
|
|
console.error('Error cleaning up file:', unlinkError);
|
|
}
|
|
}
|
|
|
|
res.status(500).json({
|
|
error: 'Upload failed',
|
|
message: error.message
|
|
});
|
|
}
|
|
});
|
|
|
|
export default router;
|