navidocs/docs/architecture/hardened-production-guide.md
ggq-admin 9c88146492 docs: Complete architecture, roadmap, and expert panel analysis
Architecture:
- database-schema.sql: Future-proof SQLite schema with Postgres migration path
- meilisearch-config.json: Search index config with boat terminology synonyms
- hardened-production-guide.md: Security hardening (queues, file safety, tenant tokens)

Roadmap:
- v1.0-mvp.md: Feature roadmap and success criteria
- 2-week-launch-plan.md: Day-by-day execution plan with deliverables

Debates:
- 01-schema-and-vertical-analysis.md: Expert panel consensus on architecture

Key Decisions:
- Hybrid SQLite + Meilisearch architecture
- Search-first design (Meilisearch as query layer)
- Multi-vertical support (boats, marinas, properties)
- Offline-first PWA approach
- Tenant token security (never expose master key)
- Background queue for OCR processing
- File safety pipeline (qpdf + ClamAV)
2025-10-19 01:22:42 +02:00

17 KiB

Hardened Tech Stack - Production-Ready Improvements

🚨 Critical Fixes Applied

Based on expert panel review, these are the must-fix items before launch.


1. Background Processing Architecture

Problem:

OCR/PDF processing will spike CPU/RAM on shared hosting and murder request latency.

Solution: Job Queue System

Option A: BullMQ + Redis (Recommended)

// server/queue/index.js
const Queue = require('bullmq').Queue;
const Worker = require('bullmq').Worker;
const Redis = require('ioredis');

const connection = new Redis({
  host: 'localhost',
  port: 6379,
  maxRetriesPerRequest: null
});

// Create queue
const ocrQueue = new Queue('ocr-processing', { connection });

// Add job (from upload endpoint)
async function queueOCR(fileData) {
  const job = await ocrQueue.add('process-pdf', {
    filePath: fileData.path,
    docId: fileData.id,
    boatId: fileData.boatId
  }, {
    attempts: 3,
    backoff: {
      type: 'exponential',
      delay: 2000
    }
  });
  return job.id;
}

// Worker (separate process)
const worker = new Worker('ocr-processing', async job => {
  const { filePath, docId, boatId } = job.data;
  
  // Update job progress
  await job.updateProgress(10);
  
  // Extract text with OCR
  const text = await extractTextWithOCR(filePath);
  await job.updateProgress(50);
  
  // Index in Meilisearch
  await indexDocument({ docId, boatId, text });
  await job.updateProgress(100);
  
  return { docId, pages: text.length };
}, { connection });

worker.on('completed', job => {
  console.log(`Job ${job.id} completed`);
});

worker.on('failed', (job, err) => {
  console.error(`Job ${job.id} failed:`, err);
});

module.exports = { queueOCR, ocrQueue };

Option B: SQLite Queue (No Redis dependency)

// server/queue/sqlite-queue.js
const Database = require('better-sqlite3');
const db = new Database('./data/queue.db');

db.exec(`
  CREATE TABLE IF NOT EXISTS jobs (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    type TEXT NOT NULL,
    payload TEXT NOT NULL,
    status TEXT DEFAULT 'pending',
    attempts INTEGER DEFAULT 0,
    max_attempts INTEGER DEFAULT 3,
    error TEXT,
    created_at INTEGER DEFAULT (unixepoch()),
    updated_at INTEGER DEFAULT (unixepoch())
  )
`);

class SQLiteQueue {
  enqueue(type, payload) {
    const stmt = db.prepare(`
      INSERT INTO jobs (type, payload) VALUES (?, ?)
    `);
    const result = stmt.run(type, JSON.stringify(payload));
    return result.lastInsertRowid;
  }
  
  dequeue() {
    const job = db.prepare(`
      SELECT * FROM jobs 
      WHERE status = 'pending' AND attempts < max_attempts
      ORDER BY created_at ASC LIMIT 1
    `).get();
    
    if (!job) return null;
    
    db.prepare(`
      UPDATE jobs SET status = 'processing', attempts = attempts + 1
      WHERE id = ?
    `).run(job.id);
    
    return {
      ...job,
      payload: JSON.parse(job.payload)
    };
  }
  
  complete(jobId) {
    db.prepare(`UPDATE jobs SET status = 'completed' WHERE id = ?`).run(jobId);
  }
  
  fail(jobId, error) {
    db.prepare(`
      UPDATE jobs SET status = 'failed', error = ? WHERE id = ?
    `).run(error, jobId);
  }
}

module.exports = new SQLiteQueue();

Worker Process (systemd service)

# ~/.config/systemd/user/ocr-worker.service
[Unit]
Description=OCR Worker for Boat Docs

[Service]
WorkingDirectory=%h/apps/boat-docs
ExecStart=/usr/bin/node server/workers/ocr-worker.js
Environment=NODE_ENV=production
Restart=always
RestartSec=10

[Install]
WantedBy=default.target

2. File Safety Pipeline

Problem:

Malicious PDFs, zip bombs, broken encodings will wreck your day.

Solution: Multi-Layer Validation

// server/middleware/file-safety.js
const { execSync } = require('child_process');
const fs = require('fs');
const path = require('path');

const FILE_LIMITS = {
  maxSize: 128 * 1024 * 1024, // 128MB
  maxPages: 1000,
  allowedMimeTypes: ['application/pdf'],
  allowedExtensions: ['.pdf']
};

async function validateUpload(file) {
  const errors = [];
  
  // 1. Extension check
  const ext = path.extname(file.originalname).toLowerCase();
  if (!FILE_LIMITS.allowedExtensions.includes(ext)) {
    errors.push(`Invalid extension: ${ext}`);
  }
  
  // 2. MIME type check
  if (!FILE_LIMITS.allowedMimeTypes.includes(file.mimetype)) {
    errors.push(`Invalid MIME type: ${file.mimetype}`);
  }
  
  // 3. File size
  if (file.size > FILE_LIMITS.maxSize) {
    errors.push(`File too large: ${(file.size / 1024 / 1024).toFixed(2)}MB`);
  }
  
  // 4. Magic byte check
  const buffer = fs.readFileSync(file.path);
  if (!buffer.toString('utf8', 0, 4).includes('%PDF')) {
    errors.push('Not a valid PDF (magic bytes)');
  }
  
  if (errors.length > 0) {
    throw new Error(errors.join('; '));
  }
  
  return true;
}

async function sanitizePDF(inputPath, outputPath) {
  try {
    // Use qpdf to linearize and sanitize
    execSync(`qpdf --linearize --newline-before-endstream "${inputPath}" "${outputPath}"`, {
      timeout: 30000 // 30 second timeout
    });
    
    // Check page count
    const info = execSync(`qpdf --show-npages "${outputPath}"`).toString().trim();
    const pageCount = parseInt(info);
    
    if (pageCount > FILE_LIMITS.maxPages) {
      throw new Error(`Too many pages: ${pageCount}`);
    }
    
    return { sanitized: true, pages: pageCount };
  } catch (err) {
    throw new Error(`PDF sanitization failed: ${err.message}`);
  }
}

async function scanForMalware(filePath) {
  try {
    // ClamAV scan
    execSync(`clamscan --no-summary "${filePath}"`, {
      timeout: 60000 // 1 minute timeout
    });
    return { clean: true };
  } catch (err) {
    if (err.status === 1) {
      throw new Error('Malware detected');
    }
    // ClamAV not installed - log warning but don't fail
    console.warn('ClamAV not available, skipping virus scan');
    return { clean: true, skipped: true };
  }
}

async function safetyPipeline(file) {
  // Step 1: Basic validation
  await validateUpload(file);
  
  // Step 2: Sanitize with qpdf
  const sanitizedPath = `${file.path}.sanitized.pdf`;
  const { pages } = await sanitizePDF(file.path, sanitizedPath);
  
  // Step 3: Malware scan
  await scanForMalware(sanitizedPath);
  
  // Step 4: Replace original with sanitized version
  fs.unlinkSync(file.path);
  fs.renameSync(sanitizedPath, file.path);
  
  return { safe: true, pages };
}

module.exports = { safetyPipeline, validateUpload };

Express route with safety

const multer = require('multer');
const { safetyPipeline } = require('./middleware/file-safety');
const { queueOCR } = require('./queue');

const upload = multer({ dest: './uploads/temp/' });

app.post('/api/upload', upload.single('manual'), async (req, res) => {
  try {
    // Safety pipeline
    const { pages } = await safetyPipeline(req.file);
    
    // Move to permanent storage
    const docId = generateId();
    const finalPath = `./data/boat-manuals/${docId}.pdf`;
    fs.renameSync(req.file.path, finalPath);
    
    // Queue for OCR processing
    const jobId = await queueOCR({
      filePath: finalPath,
      docId,
      boatId: req.body.boatId,
      pages
    });
    
    res.json({ 
      docId, 
      jobId, 
      status: 'processing',
      pages 
    });
  } catch (err) {
    // Clean up on failure
    if (req.file?.path && fs.existsSync(req.file.path)) {
      fs.unlinkSync(req.file.path);
    }
    res.status(400).json({ error: err.message });
  }
});

// Job status endpoint
app.get('/api/jobs/:jobId', async (req, res) => {
  const job = await ocrQueue.getJob(req.params.jobId);
  res.json({
    id: job.id,
    progress: job.progress,
    state: await job.getState(),
    result: job.returnvalue
  });
});

3. Meilisearch Security

Problem:

Port 7700 exposed = public data. Master key in client code = disaster.

Solution: Tenant Tokens

// server/services/search.js
const { MeiliSearch } = require('meilisearch');

const client = new MeiliSearch({
  host: 'http://localhost:7700',
  apiKey: process.env.MEILISEARCH_MASTER_KEY // NEVER send to client!
});

// Generate tenant token (short-lived, scoped)
function generateTenantToken(userId, boatIds) {
  const searchRules = {
    'boat-manuals': {
      filter: `boatId IN [${boatIds.map(id => `"${id}"`).join(', ')}]`
    }
  };
  
  const token = client.generateTenantToken(searchRules, {
    apiKey: process.env.MEILISEARCH_MASTER_KEY,
    expiresAt: new Date(Date.now() + 3600 * 1000) // 1 hour
  });
  
  return token;
}

// API endpoint to get search token
app.get('/api/search/token', requireAuth, async (req, res) => {
  const userBoats = await getUserBoats(req.user.id);
  const token = generateTenantToken(req.user.id, userBoats);
  
  res.json({ 
    token,
    host: 'https://digital-lab.ca', // Through reverse proxy
    expiresIn: 3600
  });
});

module.exports = { client, generateTenantToken };

Frontend usage (safe)

// client/src/services/search.js
let searchClient = null;

async function getSearchClient() {
  if (!searchClient) {
    // Fetch tenant token from backend
    const { token, host } = await fetch('/api/search/token').then(r => r.json());
    
    searchClient = new MeiliSearch({
      host,
      apiKey: token // Scoped, time-limited token
    });
  }
  return searchClient;
}

async function searchManuals(query) {
  const client = await getSearchClient();
  const index = client.index('boat-manuals');
  
  const results = await index.search(query, {
    filter: 'system = "electrical"', // Additional client-side filter
    attributesToHighlight: ['text', 'title']
  });
  
  return results;
}

Nginx reverse proxy (for Meilisearch)

# /etc/nginx/sites-available/digital-lab.ca
location /search/ {
    proxy_pass http://localhost:7700/;
    proxy_set_header Host $host;
    proxy_set_header X-Real-IP $remote_addr;
    
    # Only allow POST (search), block admin endpoints
    limit_except POST {
        deny all;
    }
}

4. Backup Validation Script

Problem:

Everyone has backups. Few have restores.

Solution: Automated Restore Testing

#!/bin/bash
# ~/bin/validate-backups

set -e

BACKUP_DIR=~/backups
TEST_DIR=/tmp/restore-test-$(date +%s)
LOG_FILE=~/logs/backup-validation.log

echo "[$(date)] Starting backup validation" | tee -a "$LOG_FILE"

# Create test directory
mkdir -p "$TEST_DIR"
cd "$TEST_DIR"

# 1. Restore SQLite databases
echo "Testing SQLite restore..." | tee -a "$LOG_FILE"
LATEST_DB=$(ls -t "$BACKUP_DIR"/gitea-backup-*.tar.gz | head -1)

tar -xzf "$LATEST_DB" gitea/data/gitea.db
sqlite3 gitea/data/gitea.db "PRAGMA integrity_check;" || {
  echo "ERROR: SQLite integrity check failed" | tee -a "$LOG_FILE"
  exit 1
}

echo "✓ SQLite database intact" | tee -a "$LOG_FILE"

# 2. Restore and test Meilisearch dump
echo "Testing Meilisearch restore..." | tee -a "$LOG_FILE"
LATEST_MEILI=$(ls -t "$BACKUP_DIR"/meilisearch-*.dump | head -1)

# Start temporary Meilisearch instance
/tmp/meilisearch --db-path "$TEST_DIR/meili-test" --import-dump "$LATEST_MEILI" --http-addr localhost:7777 &
MEILI_PID=$!
sleep 5

# Test search works
SEARCH_RESULT=$(curl -s http://localhost:7777/indexes/boat-manuals/search -d '{"q":"test"}')
if echo "$SEARCH_RESULT" | grep -q "hits"; then
  echo "✓ Meilisearch restore successful" | tee -a "$LOG_FILE"
else
  echo "ERROR: Meilisearch search failed" | tee -a "$LOG_FILE"
  kill $MEILI_PID
  exit 1
fi

kill $MEILI_PID

# 3. Verify file backups
echo "Testing file restore..." | tee -a "$LOG_FILE"
SAMPLE_FILES=$(find "$BACKUP_DIR/boat-manuals" -type f | head -10)
FILE_COUNT=$(echo "$SAMPLE_FILES" | wc -l)

if [ "$FILE_COUNT" -lt 1 ]; then
  echo "ERROR: No backup files found" | tee -a "$LOG_FILE"
  exit 1
fi

echo "✓ Found $FILE_COUNT sample files" | tee -a "$LOG_FILE"

# 4. Test rclone remote
echo "Testing off-box backup..." | tee -a "$LOG_FILE"
rclone ls b2:boatvault-backups/$(date +%Y-%m) | head -5 || {
  echo "ERROR: Off-box backup unreachable" | tee -a "$LOG_FILE"
  exit 1
}

echo "✓ Off-box backup accessible" | tee -a "$LOG_FILE"

# Cleanup
cd /
rm -rf "$TEST_DIR"

echo "[$(date)] ✅ All backup validation tests passed" | tee -a "$LOG_FILE"

# Send success notification (optional)
curl -X POST https://digital-lab.ca/api/notifications \
  -H "Content-Type: application/json" \
  -d '{"type":"backup-validation","status":"success"}' || true

Cron job for monthly validation

# crontab -e
0 3 1 * * /home/user/bin/validate-backups

5. Systemd Health Checks

// server/routes/health.js
const express = require('express');
const router = express.Router();
const { client: meilisearch } = require('../services/search');
const db = require('../services/database');

router.get('/health', async (req, res) => {
  const checks = {
    app: 'ok',
    database: 'unknown',
    search: 'unknown',
    queue: 'unknown'
  };
  
  let healthy = true;
  
  // Check database
  try {
    db.prepare('SELECT 1').get();
    checks.database = 'ok';
  } catch (err) {
    checks.database = 'error';
    healthy = false;
  }
  
  // Check Meilisearch
  try {
    await meilisearch.health();
    checks.search = 'ok';
  } catch (err) {
    checks.search = 'error';
    healthy = false;
  }
  
  // Check queue (if using Redis)
  try {
    const { Queue } = require('bullmq');
    const queue = new Queue('ocr-processing');
    await queue.isPaused();
    checks.queue = 'ok';
  } catch (err) {
    checks.queue = 'error';
    healthy = false;
  }
  
  res.status(healthy ? 200 : 503).json({
    status: healthy ? 'healthy' : 'degraded',
    checks,
    timestamp: new Date().toISOString()
  });
});

module.exports = router;

Monitoring with systemd

# ~/.config/systemd/user/boat-docs-healthcheck.service
[Unit]
Description=Boat Docs Health Check

[Service]
Type=oneshot
ExecStart=/usr/bin/curl -f http://localhost:8080/health

# ~/.config/systemd/user/boat-docs-healthcheck.timer
[Unit]
Description=Run boat-docs health check every 5 minutes

[Timer]
OnBootSec=5min
OnUnitActiveSec=5min

[Install]
WantedBy=timers.target

6. Security Headers & Rate Limiting

// server/middleware/security.js
const helmet = require('helmet');
const rateLimit = require('express-rate-limit');

// Helmet configuration
const securityHeaders = helmet({
  contentSecurityPolicy: {
    directives: {
      defaultSrc: ["'self'"],
      scriptSrc: ["'self'"],
      styleSrc: ["'self'", "'unsafe-inline'"], // Tailwind might need this
      imgSrc: ["'self'", "data:", "https:"],
      connectSrc: ["'self'", "https://digital-lab.ca"],
      fontSrc: ["'self'"],
      objectSrc: ["'none'"],
      mediaSrc: ["'self'"],
      frameSrc: ["'none'"],
      frameAncestors: ["'none'"]
    }
  },
  hsts: {
    maxAge: 31536000,
    includeSubDomains: true,
    preload: true
  }
});

// Rate limiters
const apiLimiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100, // 100 requests per window
  message: 'Too many requests, please try again later'
});

const uploadLimiter = rateLimit({
  windowMs: 60 * 60 * 1000, // 1 hour
  max: 10, // 10 uploads per hour
  message: 'Upload limit exceeded'
});

const searchLimiter = rateLimit({
  windowMs: 1 * 60 * 1000, // 1 minute
  max: 30, // 30 searches per minute
  message: 'Search rate limit exceeded'
});

module.exports = {
  securityHeaders,
  apiLimiter,
  uploadLimiter,
  searchLimiter
};

Apply in Express

const { securityHeaders, apiLimiter, uploadLimiter, searchLimiter } = require('./middleware/security');

// Global security
app.use(securityHeaders);

// Per-route rate limiting
app.use('/api/', apiLimiter);
app.post('/api/upload', uploadLimiter, uploadHandler);
app.post('/api/search', searchLimiter, searchHandler);

7. Gitea Upgrade Procedure

#!/bin/bash
# ~/bin/upgrade-gitea

set -e

GITEA_VERSION="1.24.0"
GITEA_BINARY="/tmp/gitea"
BACKUP_DIR=~/backups/gitea-pre-upgrade-$(date +%Y%m%d-%H%M%S)

echo "Upgrading Gitea to $GITEA_VERSION"

# 1. Stop Gitea
echo "Stopping Gitea..."
systemctl --user stop gitea.service || ssh stackcp "systemctl --user stop gitea.service"

# 2. Backup current version
echo "Creating backup..."
mkdir -p "$BACKUP_DIR"
cp -r ~/gitea "$BACKUP_DIR/"
cp "$GITEA_BINARY" "$BACKUP_DIR/gitea.old"

# 3. Download new version
echo "Downloading Gitea $GITEA_VERSION..."
curl -fsSL "https://dl.gitea.com/gitea/$GITEA_VERSION/gitea-$GITEA_VERSION-linux-amd64" -o "$GITEA_BINARY.new"
chmod 755 "$GITEA_BINARY.new"

# 4. Test new binary
echo "Testing new binary..."
"$GITEA_BINARY.new" --version

# 5. Replace binary
mv "$GITEA_BINARY" "$GITEA_BINARY.old"
mv "$GITEA_BINARY.new" "$GITEA_BINARY"

# 6. Start Gitea
echo "Starting Gitea..."
systemctl --user start gitea.service || ssh stackcp "systemctl --user start gitea.service"

# 7. Verify
sleep 5
if curl -f http://localhost:4000/ > /dev/null 2>&1; then
  echo "✅ Gitea upgrade successful to $GITEA_VERSION"
  "$GITEA_BINARY" --version
else
  echo "❌ Gitea failed to start, rolling back..."
  mv "$GITEA_BINARY.old" "$GITEA_BINARY"
  systemctl --user start gitea.service
  exit 1
fi

Summary: Production Hardening Checklist

  • Background queue for OCR (BullMQ or SQLite)
  • File safety pipeline (qpdf, ClamAV, validation)
  • Meilisearch tenant tokens (never expose master key)
  • Backup validation script (monthly restore tests)
  • Health check endpoints + monitoring
  • Security headers (helmet, CSP, HSTS)
  • Rate limiting (upload, search, API)
  • Gitea 1.24.0 upgrade
  • logrotate for application logs
  • systemd Restart=on-failure for all services

Deploy these before showing BoatVault to real users.