Architecture: - database-schema.sql: Future-proof SQLite schema with Postgres migration path - meilisearch-config.json: Search index config with boat terminology synonyms - hardened-production-guide.md: Security hardening (queues, file safety, tenant tokens) Roadmap: - v1.0-mvp.md: Feature roadmap and success criteria - 2-week-launch-plan.md: Day-by-day execution plan with deliverables Debates: - 01-schema-and-vertical-analysis.md: Expert panel consensus on architecture Key Decisions: - Hybrid SQLite + Meilisearch architecture - Search-first design (Meilisearch as query layer) - Multi-vertical support (boats, marinas, properties) - Offline-first PWA approach - Tenant token security (never expose master key) - Background queue for OCR processing - File safety pipeline (qpdf + ClamAV)
17 KiB
17 KiB
Hardened Tech Stack - Production-Ready Improvements
🚨 Critical Fixes Applied
Based on expert panel review, these are the must-fix items before launch.
1. Background Processing Architecture
Problem:
OCR/PDF processing will spike CPU/RAM on shared hosting and murder request latency.
Solution: Job Queue System
Option A: BullMQ + Redis (Recommended)
// server/queue/index.js
const Queue = require('bullmq').Queue;
const Worker = require('bullmq').Worker;
const Redis = require('ioredis');
const connection = new Redis({
host: 'localhost',
port: 6379,
maxRetriesPerRequest: null
});
// Create queue
const ocrQueue = new Queue('ocr-processing', { connection });
// Add job (from upload endpoint)
async function queueOCR(fileData) {
const job = await ocrQueue.add('process-pdf', {
filePath: fileData.path,
docId: fileData.id,
boatId: fileData.boatId
}, {
attempts: 3,
backoff: {
type: 'exponential',
delay: 2000
}
});
return job.id;
}
// Worker (separate process)
const worker = new Worker('ocr-processing', async job => {
const { filePath, docId, boatId } = job.data;
// Update job progress
await job.updateProgress(10);
// Extract text with OCR
const text = await extractTextWithOCR(filePath);
await job.updateProgress(50);
// Index in Meilisearch
await indexDocument({ docId, boatId, text });
await job.updateProgress(100);
return { docId, pages: text.length };
}, { connection });
worker.on('completed', job => {
console.log(`Job ${job.id} completed`);
});
worker.on('failed', (job, err) => {
console.error(`Job ${job.id} failed:`, err);
});
module.exports = { queueOCR, ocrQueue };
Option B: SQLite Queue (No Redis dependency)
// server/queue/sqlite-queue.js
const Database = require('better-sqlite3');
const db = new Database('./data/queue.db');
db.exec(`
CREATE TABLE IF NOT EXISTS jobs (
id INTEGER PRIMARY KEY AUTOINCREMENT,
type TEXT NOT NULL,
payload TEXT NOT NULL,
status TEXT DEFAULT 'pending',
attempts INTEGER DEFAULT 0,
max_attempts INTEGER DEFAULT 3,
error TEXT,
created_at INTEGER DEFAULT (unixepoch()),
updated_at INTEGER DEFAULT (unixepoch())
)
`);
class SQLiteQueue {
enqueue(type, payload) {
const stmt = db.prepare(`
INSERT INTO jobs (type, payload) VALUES (?, ?)
`);
const result = stmt.run(type, JSON.stringify(payload));
return result.lastInsertRowid;
}
dequeue() {
const job = db.prepare(`
SELECT * FROM jobs
WHERE status = 'pending' AND attempts < max_attempts
ORDER BY created_at ASC LIMIT 1
`).get();
if (!job) return null;
db.prepare(`
UPDATE jobs SET status = 'processing', attempts = attempts + 1
WHERE id = ?
`).run(job.id);
return {
...job,
payload: JSON.parse(job.payload)
};
}
complete(jobId) {
db.prepare(`UPDATE jobs SET status = 'completed' WHERE id = ?`).run(jobId);
}
fail(jobId, error) {
db.prepare(`
UPDATE jobs SET status = 'failed', error = ? WHERE id = ?
`).run(error, jobId);
}
}
module.exports = new SQLiteQueue();
Worker Process (systemd service)
# ~/.config/systemd/user/ocr-worker.service
[Unit]
Description=OCR Worker for Boat Docs
[Service]
WorkingDirectory=%h/apps/boat-docs
ExecStart=/usr/bin/node server/workers/ocr-worker.js
Environment=NODE_ENV=production
Restart=always
RestartSec=10
[Install]
WantedBy=default.target
2. File Safety Pipeline
Problem:
Malicious PDFs, zip bombs, broken encodings will wreck your day.
Solution: Multi-Layer Validation
// server/middleware/file-safety.js
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');
const FILE_LIMITS = {
maxSize: 128 * 1024 * 1024, // 128MB
maxPages: 1000,
allowedMimeTypes: ['application/pdf'],
allowedExtensions: ['.pdf']
};
async function validateUpload(file) {
const errors = [];
// 1. Extension check
const ext = path.extname(file.originalname).toLowerCase();
if (!FILE_LIMITS.allowedExtensions.includes(ext)) {
errors.push(`Invalid extension: ${ext}`);
}
// 2. MIME type check
if (!FILE_LIMITS.allowedMimeTypes.includes(file.mimetype)) {
errors.push(`Invalid MIME type: ${file.mimetype}`);
}
// 3. File size
if (file.size > FILE_LIMITS.maxSize) {
errors.push(`File too large: ${(file.size / 1024 / 1024).toFixed(2)}MB`);
}
// 4. Magic byte check
const buffer = fs.readFileSync(file.path);
if (!buffer.toString('utf8', 0, 4).includes('%PDF')) {
errors.push('Not a valid PDF (magic bytes)');
}
if (errors.length > 0) {
throw new Error(errors.join('; '));
}
return true;
}
async function sanitizePDF(inputPath, outputPath) {
try {
// Use qpdf to linearize and sanitize
execSync(`qpdf --linearize --newline-before-endstream "${inputPath}" "${outputPath}"`, {
timeout: 30000 // 30 second timeout
});
// Check page count
const info = execSync(`qpdf --show-npages "${outputPath}"`).toString().trim();
const pageCount = parseInt(info);
if (pageCount > FILE_LIMITS.maxPages) {
throw new Error(`Too many pages: ${pageCount}`);
}
return { sanitized: true, pages: pageCount };
} catch (err) {
throw new Error(`PDF sanitization failed: ${err.message}`);
}
}
async function scanForMalware(filePath) {
try {
// ClamAV scan
execSync(`clamscan --no-summary "${filePath}"`, {
timeout: 60000 // 1 minute timeout
});
return { clean: true };
} catch (err) {
if (err.status === 1) {
throw new Error('Malware detected');
}
// ClamAV not installed - log warning but don't fail
console.warn('ClamAV not available, skipping virus scan');
return { clean: true, skipped: true };
}
}
async function safetyPipeline(file) {
// Step 1: Basic validation
await validateUpload(file);
// Step 2: Sanitize with qpdf
const sanitizedPath = `${file.path}.sanitized.pdf`;
const { pages } = await sanitizePDF(file.path, sanitizedPath);
// Step 3: Malware scan
await scanForMalware(sanitizedPath);
// Step 4: Replace original with sanitized version
fs.unlinkSync(file.path);
fs.renameSync(sanitizedPath, file.path);
return { safe: true, pages };
}
module.exports = { safetyPipeline, validateUpload };
Express route with safety
const multer = require('multer');
const { safetyPipeline } = require('./middleware/file-safety');
const { queueOCR } = require('./queue');
const upload = multer({ dest: './uploads/temp/' });
app.post('/api/upload', upload.single('manual'), async (req, res) => {
try {
// Safety pipeline
const { pages } = await safetyPipeline(req.file);
// Move to permanent storage
const docId = generateId();
const finalPath = `./data/boat-manuals/${docId}.pdf`;
fs.renameSync(req.file.path, finalPath);
// Queue for OCR processing
const jobId = await queueOCR({
filePath: finalPath,
docId,
boatId: req.body.boatId,
pages
});
res.json({
docId,
jobId,
status: 'processing',
pages
});
} catch (err) {
// Clean up on failure
if (req.file?.path && fs.existsSync(req.file.path)) {
fs.unlinkSync(req.file.path);
}
res.status(400).json({ error: err.message });
}
});
// Job status endpoint
app.get('/api/jobs/:jobId', async (req, res) => {
const job = await ocrQueue.getJob(req.params.jobId);
res.json({
id: job.id,
progress: job.progress,
state: await job.getState(),
result: job.returnvalue
});
});
3. Meilisearch Security
Problem:
Port 7700 exposed = public data. Master key in client code = disaster.
Solution: Tenant Tokens
// server/services/search.js
const { MeiliSearch } = require('meilisearch');
const client = new MeiliSearch({
host: 'http://localhost:7700',
apiKey: process.env.MEILISEARCH_MASTER_KEY // NEVER send to client!
});
// Generate tenant token (short-lived, scoped)
function generateTenantToken(userId, boatIds) {
const searchRules = {
'boat-manuals': {
filter: `boatId IN [${boatIds.map(id => `"${id}"`).join(', ')}]`
}
};
const token = client.generateTenantToken(searchRules, {
apiKey: process.env.MEILISEARCH_MASTER_KEY,
expiresAt: new Date(Date.now() + 3600 * 1000) // 1 hour
});
return token;
}
// API endpoint to get search token
app.get('/api/search/token', requireAuth, async (req, res) => {
const userBoats = await getUserBoats(req.user.id);
const token = generateTenantToken(req.user.id, userBoats);
res.json({
token,
host: 'https://digital-lab.ca', // Through reverse proxy
expiresIn: 3600
});
});
module.exports = { client, generateTenantToken };
Frontend usage (safe)
// client/src/services/search.js
let searchClient = null;
async function getSearchClient() {
if (!searchClient) {
// Fetch tenant token from backend
const { token, host } = await fetch('/api/search/token').then(r => r.json());
searchClient = new MeiliSearch({
host,
apiKey: token // Scoped, time-limited token
});
}
return searchClient;
}
async function searchManuals(query) {
const client = await getSearchClient();
const index = client.index('boat-manuals');
const results = await index.search(query, {
filter: 'system = "electrical"', // Additional client-side filter
attributesToHighlight: ['text', 'title']
});
return results;
}
Nginx reverse proxy (for Meilisearch)
# /etc/nginx/sites-available/digital-lab.ca
location /search/ {
proxy_pass http://localhost:7700/;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
# Only allow POST (search), block admin endpoints
limit_except POST {
deny all;
}
}
4. Backup Validation Script
Problem:
Everyone has backups. Few have restores.
Solution: Automated Restore Testing
#!/bin/bash
# ~/bin/validate-backups
set -e
BACKUP_DIR=~/backups
TEST_DIR=/tmp/restore-test-$(date +%s)
LOG_FILE=~/logs/backup-validation.log
echo "[$(date)] Starting backup validation" | tee -a "$LOG_FILE"
# Create test directory
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"
# 1. Restore SQLite databases
echo "Testing SQLite restore..." | tee -a "$LOG_FILE"
LATEST_DB=$(ls -t "$BACKUP_DIR"/gitea-backup-*.tar.gz | head -1)
tar -xzf "$LATEST_DB" gitea/data/gitea.db
sqlite3 gitea/data/gitea.db "PRAGMA integrity_check;" || {
echo "ERROR: SQLite integrity check failed" | tee -a "$LOG_FILE"
exit 1
}
echo "✓ SQLite database intact" | tee -a "$LOG_FILE"
# 2. Restore and test Meilisearch dump
echo "Testing Meilisearch restore..." | tee -a "$LOG_FILE"
LATEST_MEILI=$(ls -t "$BACKUP_DIR"/meilisearch-*.dump | head -1)
# Start temporary Meilisearch instance
/tmp/meilisearch --db-path "$TEST_DIR/meili-test" --import-dump "$LATEST_MEILI" --http-addr localhost:7777 &
MEILI_PID=$!
sleep 5
# Test search works
SEARCH_RESULT=$(curl -s http://localhost:7777/indexes/boat-manuals/search -d '{"q":"test"}')
if echo "$SEARCH_RESULT" | grep -q "hits"; then
echo "✓ Meilisearch restore successful" | tee -a "$LOG_FILE"
else
echo "ERROR: Meilisearch search failed" | tee -a "$LOG_FILE"
kill $MEILI_PID
exit 1
fi
kill $MEILI_PID
# 3. Verify file backups
echo "Testing file restore..." | tee -a "$LOG_FILE"
SAMPLE_FILES=$(find "$BACKUP_DIR/boat-manuals" -type f | head -10)
FILE_COUNT=$(echo "$SAMPLE_FILES" | wc -l)
if [ "$FILE_COUNT" -lt 1 ]; then
echo "ERROR: No backup files found" | tee -a "$LOG_FILE"
exit 1
fi
echo "✓ Found $FILE_COUNT sample files" | tee -a "$LOG_FILE"
# 4. Test rclone remote
echo "Testing off-box backup..." | tee -a "$LOG_FILE"
rclone ls b2:boatvault-backups/$(date +%Y-%m) | head -5 || {
echo "ERROR: Off-box backup unreachable" | tee -a "$LOG_FILE"
exit 1
}
echo "✓ Off-box backup accessible" | tee -a "$LOG_FILE"
# Cleanup
cd /
rm -rf "$TEST_DIR"
echo "[$(date)] ✅ All backup validation tests passed" | tee -a "$LOG_FILE"
# Send success notification (optional)
curl -X POST https://digital-lab.ca/api/notifications \
-H "Content-Type: application/json" \
-d '{"type":"backup-validation","status":"success"}' || true
Cron job for monthly validation
# crontab -e
0 3 1 * * /home/user/bin/validate-backups
5. Systemd Health Checks
// server/routes/health.js
const express = require('express');
const router = express.Router();
const { client: meilisearch } = require('../services/search');
const db = require('../services/database');
router.get('/health', async (req, res) => {
const checks = {
app: 'ok',
database: 'unknown',
search: 'unknown',
queue: 'unknown'
};
let healthy = true;
// Check database
try {
db.prepare('SELECT 1').get();
checks.database = 'ok';
} catch (err) {
checks.database = 'error';
healthy = false;
}
// Check Meilisearch
try {
await meilisearch.health();
checks.search = 'ok';
} catch (err) {
checks.search = 'error';
healthy = false;
}
// Check queue (if using Redis)
try {
const { Queue } = require('bullmq');
const queue = new Queue('ocr-processing');
await queue.isPaused();
checks.queue = 'ok';
} catch (err) {
checks.queue = 'error';
healthy = false;
}
res.status(healthy ? 200 : 503).json({
status: healthy ? 'healthy' : 'degraded',
checks,
timestamp: new Date().toISOString()
});
});
module.exports = router;
Monitoring with systemd
# ~/.config/systemd/user/boat-docs-healthcheck.service
[Unit]
Description=Boat Docs Health Check
[Service]
Type=oneshot
ExecStart=/usr/bin/curl -f http://localhost:8080/health
# ~/.config/systemd/user/boat-docs-healthcheck.timer
[Unit]
Description=Run boat-docs health check every 5 minutes
[Timer]
OnBootSec=5min
OnUnitActiveSec=5min
[Install]
WantedBy=timers.target
6. Security Headers & Rate Limiting
// server/middleware/security.js
const helmet = require('helmet');
const rateLimit = require('express-rate-limit');
// Helmet configuration
const securityHeaders = helmet({
contentSecurityPolicy: {
directives: {
defaultSrc: ["'self'"],
scriptSrc: ["'self'"],
styleSrc: ["'self'", "'unsafe-inline'"], // Tailwind might need this
imgSrc: ["'self'", "data:", "https:"],
connectSrc: ["'self'", "https://digital-lab.ca"],
fontSrc: ["'self'"],
objectSrc: ["'none'"],
mediaSrc: ["'self'"],
frameSrc: ["'none'"],
frameAncestors: ["'none'"]
}
},
hsts: {
maxAge: 31536000,
includeSubDomains: true,
preload: true
}
});
// Rate limiters
const apiLimiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100, // 100 requests per window
message: 'Too many requests, please try again later'
});
const uploadLimiter = rateLimit({
windowMs: 60 * 60 * 1000, // 1 hour
max: 10, // 10 uploads per hour
message: 'Upload limit exceeded'
});
const searchLimiter = rateLimit({
windowMs: 1 * 60 * 1000, // 1 minute
max: 30, // 30 searches per minute
message: 'Search rate limit exceeded'
});
module.exports = {
securityHeaders,
apiLimiter,
uploadLimiter,
searchLimiter
};
Apply in Express
const { securityHeaders, apiLimiter, uploadLimiter, searchLimiter } = require('./middleware/security');
// Global security
app.use(securityHeaders);
// Per-route rate limiting
app.use('/api/', apiLimiter);
app.post('/api/upload', uploadLimiter, uploadHandler);
app.post('/api/search', searchLimiter, searchHandler);
7. Gitea Upgrade Procedure
#!/bin/bash
# ~/bin/upgrade-gitea
set -e
GITEA_VERSION="1.24.0"
GITEA_BINARY="/tmp/gitea"
BACKUP_DIR=~/backups/gitea-pre-upgrade-$(date +%Y%m%d-%H%M%S)
echo "Upgrading Gitea to $GITEA_VERSION"
# 1. Stop Gitea
echo "Stopping Gitea..."
systemctl --user stop gitea.service || ssh stackcp "systemctl --user stop gitea.service"
# 2. Backup current version
echo "Creating backup..."
mkdir -p "$BACKUP_DIR"
cp -r ~/gitea "$BACKUP_DIR/"
cp "$GITEA_BINARY" "$BACKUP_DIR/gitea.old"
# 3. Download new version
echo "Downloading Gitea $GITEA_VERSION..."
curl -fsSL "https://dl.gitea.com/gitea/$GITEA_VERSION/gitea-$GITEA_VERSION-linux-amd64" -o "$GITEA_BINARY.new"
chmod 755 "$GITEA_BINARY.new"
# 4. Test new binary
echo "Testing new binary..."
"$GITEA_BINARY.new" --version
# 5. Replace binary
mv "$GITEA_BINARY" "$GITEA_BINARY.old"
mv "$GITEA_BINARY.new" "$GITEA_BINARY"
# 6. Start Gitea
echo "Starting Gitea..."
systemctl --user start gitea.service || ssh stackcp "systemctl --user start gitea.service"
# 7. Verify
sleep 5
if curl -f http://localhost:4000/ > /dev/null 2>&1; then
echo "✅ Gitea upgrade successful to $GITEA_VERSION"
"$GITEA_BINARY" --version
else
echo "❌ Gitea failed to start, rolling back..."
mv "$GITEA_BINARY.old" "$GITEA_BINARY"
systemctl --user start gitea.service
exit 1
fi
Summary: Production Hardening Checklist
- Background queue for OCR (BullMQ or SQLite)
- File safety pipeline (qpdf, ClamAV, validation)
- Meilisearch tenant tokens (never expose master key)
- Backup validation script (monthly restore tests)
- Health check endpoints + monitoring
- Security headers (helmet, CSP, HSTS)
- Rate limiting (upload, search, API)
- Gitea 1.24.0 upgrade
- logrotate for application logs
- systemd Restart=on-failure for all services
Deploy these before showing BoatVault to real users.