navidocs/server/routes/upload.js
ggq-admin 155a8c0305 feat: NaviDocs MVP - Complete codebase extraction from lilian1
## Backend (server/)
- Express 5 API with security middleware (helmet, rate limiting)
- SQLite database with WAL mode (schema from docs/architecture/)
- Meilisearch integration with tenant tokens
- BullMQ + Redis background job queue
- OCR pipeline with Tesseract.js
- File safety validation (extension, MIME, size)
- 4 API route modules: upload, jobs, search, documents

## Frontend (client/)
- Vue 3 with Composition API (<script setup>)
- Vite 5 build system with HMR
- Tailwind CSS (Meilisearch-inspired design)
- UploadModal with drag-and-drop
- FigureZoom component (ported from lilian1)
- Meilisearch search integration with tenant tokens
- Job polling composable
- Clean SVG icons (no emojis)

## Code Extraction
-  manuals.js → UploadModal.vue, useJobPolling.js
-  figure-zoom.js → FigureZoom.vue
-  service-worker.js → client/public/service-worker.js (TODO)
-  glossary.json → Merged into Meilisearch synonyms
-  Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk)

## Documentation
- Complete extraction plan in docs/analysis/
- README with quick start guide
- Architecture summary in docs/architecture/

## Build Status
- Server dependencies:  Installed (234 packages)
- Client dependencies:  Installed (160 packages)
- Client build:  Successful (2.63s)

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 01:55:44 +02:00

184 lines
5.1 KiB
JavaScript

/**
* Upload Route - POST /api/upload
* Handles PDF file uploads with validation, storage, and OCR queue processing
*/
import express from 'express';
import multer from 'multer';
import { v4 as uuidv4 } from 'uuid';
import crypto from 'crypto';
import fs from 'fs/promises';
import path from 'path';
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { getDb } from '../db/db.js';
import { validateFile, sanitizeFilename } from '../services/file-safety.js';
import { addOcrJob } from '../services/queue.js';
const __dirname = dirname(fileURLToPath(import.meta.url));
const router = express.Router();
// Configure multer for memory storage (we'll validate before saving)
const upload = multer({
storage: multer.memoryStorage(),
limits: {
fileSize: parseInt(process.env.MAX_FILE_SIZE || '52428800') // 50MB
}
});
const UPLOAD_DIR = process.env.UPLOAD_DIR || join(__dirname, '../../uploads');
// Ensure upload directory exists
await fs.mkdir(UPLOAD_DIR, { recursive: true });
/**
* POST /api/upload
* Upload PDF file and queue for OCR processing
*
* @body {File} file - PDF file to upload
* @body {string} title - Document title
* @body {string} documentType - Document type (owner-manual, component-manual, etc)
* @body {string} organizationId - Organization UUID
* @body {string} [entityId] - Optional entity UUID
* @body {string} [componentId] - Optional component UUID
*
* @returns {Object} { jobId, documentId }
*/
router.post('/', upload.single('file'), async (req, res) => {
try {
const file = req.file;
const { title, documentType, organizationId, entityId, componentId, subEntityId } = req.body;
// TODO: Authentication middleware should provide req.user
const userId = req.user?.id || 'test-user-id'; // Temporary for testing
// Validate required fields
if (!file) {
return res.status(400).json({ error: 'No file uploaded' });
}
if (!title || !documentType || !organizationId) {
return res.status(400).json({
error: 'Missing required fields: title, documentType, organizationId'
});
}
// Validate file safety
const validation = await validateFile(file);
if (!validation.valid) {
return res.status(400).json({ error: validation.error });
}
// Generate UUIDs
const documentId = uuidv4();
const jobId = uuidv4();
// Calculate file hash (SHA256) for deduplication
const fileHash = crypto
.createHash('sha256')
.update(file.buffer)
.digest('hex');
// Sanitize filename
const sanitizedFilename = sanitizeFilename(file.originalname);
const fileExt = path.extname(sanitizedFilename);
const storedFilename = `${documentId}${fileExt}`;
const filePath = join(UPLOAD_DIR, storedFilename);
// Save file to disk
await fs.writeFile(filePath, file.buffer);
// Get database connection
const db = getDb();
// Check for duplicate file hash (optional deduplication)
const duplicateCheck = db.prepare(
'SELECT id, title, file_path FROM documents WHERE file_hash = ? AND organization_id = ? AND status != ?'
).get(fileHash, organizationId, 'deleted');
if (duplicateCheck) {
// File already exists - optionally return existing document
// For now, we'll allow duplicates but log it
console.log(`Duplicate file detected: ${duplicateCheck.id}, proceeding with new upload`);
}
const timestamp = Date.now();
// Insert document record
const insertDocument = db.prepare(`
INSERT INTO documents (
id, organization_id, entity_id, sub_entity_id, component_id, uploaded_by,
title, document_type, file_path, file_name, file_size, file_hash, mime_type,
status, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`);
insertDocument.run(
documentId,
organizationId,
entityId || null,
subEntityId || null,
componentId || null,
userId,
title,
documentType,
filePath,
sanitizedFilename,
file.size,
fileHash,
'application/pdf',
'processing',
timestamp,
timestamp
);
// Insert OCR job record
const insertJob = db.prepare(`
INSERT INTO ocr_jobs (
id, document_id, status, progress, created_at
) VALUES (?, ?, ?, ?, ?)
`);
insertJob.run(
jobId,
documentId,
'pending',
0,
timestamp
);
// Queue OCR job
await addOcrJob(documentId, jobId, {
filePath,
fileName: sanitizedFilename,
organizationId,
userId
});
// Return success response
res.status(201).json({
jobId,
documentId,
message: 'File uploaded successfully and queued for processing'
});
} catch (error) {
console.error('Upload error:', error);
// Clean up file if it was saved
if (req.file && req.file.path) {
try {
await fs.unlink(req.file.path);
} catch (unlinkError) {
console.error('Error cleaning up file:', unlinkError);
}
}
res.status(500).json({
error: 'Upload failed',
message: error.message
});
}
});
export default router;