From 9c88146492f345e802d35cc4f4353ec4eb9a8bac Mon Sep 17 00:00:00 2001 From: ggq-admin Date: Sun, 19 Oct 2025 01:22:42 +0200 Subject: [PATCH] docs: Complete architecture, roadmap, and expert panel analysis Architecture: - database-schema.sql: Future-proof SQLite schema with Postgres migration path - meilisearch-config.json: Search index config with boat terminology synonyms - hardened-production-guide.md: Security hardening (queues, file safety, tenant tokens) Roadmap: - v1.0-mvp.md: Feature roadmap and success criteria - 2-week-launch-plan.md: Day-by-day execution plan with deliverables Debates: - 01-schema-and-vertical-analysis.md: Expert panel consensus on architecture Key Decisions: - Hybrid SQLite + Meilisearch architecture - Search-first design (Meilisearch as query layer) - Multi-vertical support (boats, marinas, properties) - Offline-first PWA approach - Tenant token security (never expose master key) - Background queue for OCR processing - File safety pipeline (qpdf + ClamAV) --- docs/architecture/database-schema.sql | 292 +++++++ .../architecture/hardened-production-guide.md | 741 ++++++++++++++++++ docs/architecture/meilisearch-config.json | 276 +++++++ docs/roadmap/2-week-launch-plan.md | 337 ++++++++ docs/roadmap/v1.0-mvp.md | 153 ++++ 5 files changed, 1799 insertions(+) create mode 100644 docs/architecture/database-schema.sql create mode 100644 docs/architecture/hardened-production-guide.md create mode 100644 docs/architecture/meilisearch-config.json create mode 100644 docs/roadmap/2-week-launch-plan.md create mode 100644 docs/roadmap/v1.0-mvp.md diff --git a/docs/architecture/database-schema.sql b/docs/architecture/database-schema.sql new file mode 100644 index 0000000..6188665 --- /dev/null +++ b/docs/architecture/database-schema.sql @@ -0,0 +1,292 @@ +-- NaviDocs Database Schema v1.0 +-- SQLite3 (designed for future PostgreSQL migration) +-- Author: Expert Panel Consensus +-- Date: 2025-01-19 + +-- ============================================================================ +-- CORE ENTITIES +-- ============================================================================ + +-- Users table +CREATE TABLE users ( + id TEXT PRIMARY KEY, -- UUID + email TEXT UNIQUE NOT NULL, + name TEXT, + password_hash TEXT NOT NULL, -- bcrypt hash + created_at INTEGER NOT NULL, -- Unix timestamp + updated_at INTEGER NOT NULL, + last_login_at INTEGER +); + +-- Organizations (for multi-entity support) +CREATE TABLE organizations ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + type TEXT DEFAULT 'personal', -- personal, commercial, hoa + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL +); + +-- User-Organization membership +CREATE TABLE user_organizations ( + user_id TEXT NOT NULL, + organization_id TEXT NOT NULL, + role TEXT DEFAULT 'member', -- admin, manager, member, viewer + joined_at INTEGER NOT NULL, + PRIMARY KEY (user_id, organization_id), + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (organization_id) REFERENCES organizations(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- BOAT/ENTITY MANAGEMENT +-- ============================================================================ + +-- Boats/Entities (multi-vertical support) +CREATE TABLE entities ( + id TEXT PRIMARY KEY, + organization_id TEXT NOT NULL, + user_id TEXT NOT NULL, -- Primary owner + entity_type TEXT NOT NULL, -- boat, marina, condo, etc + name TEXT NOT NULL, + + -- Boat-specific fields (nullable for other entity types) + make TEXT, + model TEXT, + year INTEGER, + hull_id TEXT, -- Hull Identification Number + vessel_type TEXT, -- powerboat, sailboat, catamaran, trawler + length_feet INTEGER, + + -- Property-specific fields (nullable for boats) + property_type TEXT, -- marina, waterfront-condo, yacht-club + address TEXT, + gps_lat REAL, + gps_lon REAL, + + -- Extensible metadata (JSON) + metadata TEXT, + + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + + FOREIGN KEY (organization_id) REFERENCES organizations(id) ON DELETE CASCADE, + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE +); + +-- Sub-entities (systems, docks, units, facilities) +CREATE TABLE sub_entities ( + id TEXT PRIMARY KEY, + entity_id TEXT NOT NULL, + name TEXT NOT NULL, + type TEXT, -- system, dock, unit, facility + metadata TEXT, -- JSON + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE +); + +-- Components (engines, panels, appliances) +CREATE TABLE components ( + id TEXT PRIMARY KEY, + sub_entity_id TEXT, + entity_id TEXT, -- Direct link for non-hierarchical components + name TEXT NOT NULL, + manufacturer TEXT, + model_number TEXT, + serial_number TEXT, + install_date INTEGER, + warranty_expires INTEGER, + metadata TEXT, -- JSON + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + FOREIGN KEY (sub_entity_id) REFERENCES sub_entities(id) ON DELETE SET NULL, + FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- DOCUMENT MANAGEMENT +-- ============================================================================ + +-- Documents +CREATE TABLE documents ( + id TEXT PRIMARY KEY, + organization_id TEXT NOT NULL, + entity_id TEXT, -- Boat, marina, condo + sub_entity_id TEXT, -- System, dock, unit + component_id TEXT, -- Engine, panel, appliance + uploaded_by TEXT NOT NULL, + + title TEXT NOT NULL, + document_type TEXT NOT NULL, -- owner-manual, component-manual, service-record, etc + file_path TEXT NOT NULL, + file_name TEXT NOT NULL, + file_size INTEGER NOT NULL, + file_hash TEXT NOT NULL, -- SHA256 for deduplication + mime_type TEXT DEFAULT 'application/pdf', + + page_count INTEGER, + language TEXT DEFAULT 'en', + + status TEXT DEFAULT 'processing', -- processing, indexed, failed, archived, deleted + replaced_by TEXT, -- Document ID that supersedes this one + + -- Shared component library support + is_shared BOOLEAN DEFAULT 0, + shared_component_id TEXT, -- Reference to shared manual + + -- Metadata (JSON) + metadata TEXT, + + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL, + + FOREIGN KEY (organization_id) REFERENCES organizations(id) ON DELETE CASCADE, + FOREIGN KEY (entity_id) REFERENCES entities(id) ON DELETE SET NULL, + FOREIGN KEY (sub_entity_id) REFERENCES sub_entities(id) ON DELETE SET NULL, + FOREIGN KEY (component_id) REFERENCES components(id) ON DELETE SET NULL, + FOREIGN KEY (uploaded_by) REFERENCES users(id) ON DELETE SET NULL +); + +-- Document pages (OCR results) +CREATE TABLE document_pages ( + id TEXT PRIMARY KEY, + document_id TEXT NOT NULL, + page_number INTEGER NOT NULL, + + -- OCR data + ocr_text TEXT, + ocr_confidence REAL, + ocr_language TEXT DEFAULT 'en', + ocr_completed_at INTEGER, + + -- Search indexing + search_indexed_at INTEGER, + meilisearch_id TEXT, -- ID in Meilisearch index + + -- Metadata (JSON: bounding boxes, etc) + metadata TEXT, + + created_at INTEGER NOT NULL, + + UNIQUE(document_id, page_number), + FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- BACKGROUND JOB QUEUE +-- ============================================================================ + +-- OCR Jobs (queue) +CREATE TABLE ocr_jobs ( + id TEXT PRIMARY KEY, + document_id TEXT NOT NULL, + + status TEXT DEFAULT 'pending', -- pending, processing, completed, failed + progress INTEGER DEFAULT 0, -- 0-100 + + error TEXT, + started_at INTEGER, + completed_at INTEGER, + created_at INTEGER NOT NULL, + + FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- PERMISSIONS & SHARING +-- ============================================================================ + +-- Document permissions (granular access control) +CREATE TABLE permissions ( + id TEXT PRIMARY KEY, + resource_type TEXT NOT NULL, -- document, entity, organization + resource_id TEXT NOT NULL, + user_id TEXT NOT NULL, + permission TEXT NOT NULL, -- read, write, share, delete, admin + granted_by TEXT NOT NULL, + granted_at INTEGER NOT NULL, + expires_at INTEGER, + + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (granted_by) REFERENCES users(id) ON DELETE SET NULL +); + +-- Document shares (simplified sharing) +CREATE TABLE document_shares ( + id TEXT PRIMARY KEY, + document_id TEXT NOT NULL, + shared_by TEXT NOT NULL, + shared_with TEXT NOT NULL, + permission TEXT DEFAULT 'read', -- read, write + created_at INTEGER NOT NULL, + + UNIQUE(document_id, shared_with), + FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE, + FOREIGN KEY (shared_by) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (shared_with) REFERENCES users(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- BOOKMARKS & USER PREFERENCES +-- ============================================================================ + +-- Bookmarks (quick access to important pages) +CREATE TABLE bookmarks ( + id TEXT PRIMARY KEY, + user_id TEXT NOT NULL, + document_id TEXT NOT NULL, + page_id TEXT, -- Optional: specific page + label TEXT NOT NULL, + quick_access BOOLEAN DEFAULT 0, -- Pin to homepage + created_at INTEGER NOT NULL, + + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE, + FOREIGN KEY (page_id) REFERENCES document_pages(id) ON DELETE CASCADE +); + +-- ============================================================================ +-- INDEXES FOR PERFORMANCE +-- ============================================================================ + +CREATE INDEX idx_entities_org ON entities(organization_id); +CREATE INDEX idx_entities_user ON entities(user_id); +CREATE INDEX idx_entities_type ON entities(entity_type); + +CREATE INDEX idx_documents_org ON documents(organization_id); +CREATE INDEX idx_documents_entity ON documents(entity_id); +CREATE INDEX idx_documents_status ON documents(status); +CREATE INDEX idx_documents_hash ON documents(file_hash); +CREATE INDEX idx_documents_shared ON documents(is_shared, shared_component_id); + +CREATE INDEX idx_pages_document ON document_pages(document_id); +CREATE INDEX idx_pages_indexed ON document_pages(search_indexed_at); + +CREATE INDEX idx_jobs_status ON ocr_jobs(status); +CREATE INDEX idx_jobs_document ON ocr_jobs(document_id); + +CREATE INDEX idx_permissions_user ON permissions(user_id); +CREATE INDEX idx_permissions_resource ON permissions(resource_type, resource_id); + +CREATE INDEX idx_bookmarks_user ON bookmarks(user_id); + +-- ============================================================================ +-- INITIAL DATA +-- ============================================================================ + +-- Create default personal organization for each user (handled in application) +-- Seed data will be added via migrations + +-- ============================================================================ +-- MIGRATION NOTES +-- ============================================================================ + +-- To migrate to PostgreSQL in the future: +-- 1. Replace TEXT PRIMARY KEY with UUID type +-- 2. Replace INTEGER timestamps with TIMESTAMP +-- 3. Replace TEXT metadata columns with JSONB +-- 4. Add proper CHECK constraints +-- 5. Consider partitioning for large tables (document_pages) +-- 6. Add pgvector extension for embedding support + diff --git a/docs/architecture/hardened-production-guide.md b/docs/architecture/hardened-production-guide.md new file mode 100644 index 0000000..082b4af --- /dev/null +++ b/docs/architecture/hardened-production-guide.md @@ -0,0 +1,741 @@ +# Hardened Tech Stack - Production-Ready Improvements + +## 🚨 Critical Fixes Applied + +Based on expert panel review, these are the **must-fix** items before launch. + +--- + +## 1. Background Processing Architecture + +### **Problem:** +OCR/PDF processing will spike CPU/RAM on shared hosting and murder request latency. + +### **Solution: Job Queue System** + +**Option A: BullMQ + Redis (Recommended)** +```javascript +// server/queue/index.js +const Queue = require('bullmq').Queue; +const Worker = require('bullmq').Worker; +const Redis = require('ioredis'); + +const connection = new Redis({ + host: 'localhost', + port: 6379, + maxRetriesPerRequest: null +}); + +// Create queue +const ocrQueue = new Queue('ocr-processing', { connection }); + +// Add job (from upload endpoint) +async function queueOCR(fileData) { + const job = await ocrQueue.add('process-pdf', { + filePath: fileData.path, + docId: fileData.id, + boatId: fileData.boatId + }, { + attempts: 3, + backoff: { + type: 'exponential', + delay: 2000 + } + }); + return job.id; +} + +// Worker (separate process) +const worker = new Worker('ocr-processing', async job => { + const { filePath, docId, boatId } = job.data; + + // Update job progress + await job.updateProgress(10); + + // Extract text with OCR + const text = await extractTextWithOCR(filePath); + await job.updateProgress(50); + + // Index in Meilisearch + await indexDocument({ docId, boatId, text }); + await job.updateProgress(100); + + return { docId, pages: text.length }; +}, { connection }); + +worker.on('completed', job => { + console.log(`Job ${job.id} completed`); +}); + +worker.on('failed', (job, err) => { + console.error(`Job ${job.id} failed:`, err); +}); + +module.exports = { queueOCR, ocrQueue }; +``` + +**Option B: SQLite Queue (No Redis dependency)** +```javascript +// server/queue/sqlite-queue.js +const Database = require('better-sqlite3'); +const db = new Database('./data/queue.db'); + +db.exec(` + CREATE TABLE IF NOT EXISTS jobs ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + type TEXT NOT NULL, + payload TEXT NOT NULL, + status TEXT DEFAULT 'pending', + attempts INTEGER DEFAULT 0, + max_attempts INTEGER DEFAULT 3, + error TEXT, + created_at INTEGER DEFAULT (unixepoch()), + updated_at INTEGER DEFAULT (unixepoch()) + ) +`); + +class SQLiteQueue { + enqueue(type, payload) { + const stmt = db.prepare(` + INSERT INTO jobs (type, payload) VALUES (?, ?) + `); + const result = stmt.run(type, JSON.stringify(payload)); + return result.lastInsertRowid; + } + + dequeue() { + const job = db.prepare(` + SELECT * FROM jobs + WHERE status = 'pending' AND attempts < max_attempts + ORDER BY created_at ASC LIMIT 1 + `).get(); + + if (!job) return null; + + db.prepare(` + UPDATE jobs SET status = 'processing', attempts = attempts + 1 + WHERE id = ? + `).run(job.id); + + return { + ...job, + payload: JSON.parse(job.payload) + }; + } + + complete(jobId) { + db.prepare(`UPDATE jobs SET status = 'completed' WHERE id = ?`).run(jobId); + } + + fail(jobId, error) { + db.prepare(` + UPDATE jobs SET status = 'failed', error = ? WHERE id = ? + `).run(error, jobId); + } +} + +module.exports = new SQLiteQueue(); +``` + +**Worker Process (systemd service)** +```ini +# ~/.config/systemd/user/ocr-worker.service +[Unit] +Description=OCR Worker for Boat Docs + +[Service] +WorkingDirectory=%h/apps/boat-docs +ExecStart=/usr/bin/node server/workers/ocr-worker.js +Environment=NODE_ENV=production +Restart=always +RestartSec=10 + +[Install] +WantedBy=default.target +``` + +--- + +## 2. File Safety Pipeline + +### **Problem:** +Malicious PDFs, zip bombs, broken encodings will wreck your day. + +### **Solution: Multi-Layer Validation** + +```javascript +// server/middleware/file-safety.js +const { execSync } = require('child_process'); +const fs = require('fs'); +const path = require('path'); + +const FILE_LIMITS = { + maxSize: 128 * 1024 * 1024, // 128MB + maxPages: 1000, + allowedMimeTypes: ['application/pdf'], + allowedExtensions: ['.pdf'] +}; + +async function validateUpload(file) { + const errors = []; + + // 1. Extension check + const ext = path.extname(file.originalname).toLowerCase(); + if (!FILE_LIMITS.allowedExtensions.includes(ext)) { + errors.push(`Invalid extension: ${ext}`); + } + + // 2. MIME type check + if (!FILE_LIMITS.allowedMimeTypes.includes(file.mimetype)) { + errors.push(`Invalid MIME type: ${file.mimetype}`); + } + + // 3. File size + if (file.size > FILE_LIMITS.maxSize) { + errors.push(`File too large: ${(file.size / 1024 / 1024).toFixed(2)}MB`); + } + + // 4. Magic byte check + const buffer = fs.readFileSync(file.path); + if (!buffer.toString('utf8', 0, 4).includes('%PDF')) { + errors.push('Not a valid PDF (magic bytes)'); + } + + if (errors.length > 0) { + throw new Error(errors.join('; ')); + } + + return true; +} + +async function sanitizePDF(inputPath, outputPath) { + try { + // Use qpdf to linearize and sanitize + execSync(`qpdf --linearize --newline-before-endstream "${inputPath}" "${outputPath}"`, { + timeout: 30000 // 30 second timeout + }); + + // Check page count + const info = execSync(`qpdf --show-npages "${outputPath}"`).toString().trim(); + const pageCount = parseInt(info); + + if (pageCount > FILE_LIMITS.maxPages) { + throw new Error(`Too many pages: ${pageCount}`); + } + + return { sanitized: true, pages: pageCount }; + } catch (err) { + throw new Error(`PDF sanitization failed: ${err.message}`); + } +} + +async function scanForMalware(filePath) { + try { + // ClamAV scan + execSync(`clamscan --no-summary "${filePath}"`, { + timeout: 60000 // 1 minute timeout + }); + return { clean: true }; + } catch (err) { + if (err.status === 1) { + throw new Error('Malware detected'); + } + // ClamAV not installed - log warning but don't fail + console.warn('ClamAV not available, skipping virus scan'); + return { clean: true, skipped: true }; + } +} + +async function safetyPipeline(file) { + // Step 1: Basic validation + await validateUpload(file); + + // Step 2: Sanitize with qpdf + const sanitizedPath = `${file.path}.sanitized.pdf`; + const { pages } = await sanitizePDF(file.path, sanitizedPath); + + // Step 3: Malware scan + await scanForMalware(sanitizedPath); + + // Step 4: Replace original with sanitized version + fs.unlinkSync(file.path); + fs.renameSync(sanitizedPath, file.path); + + return { safe: true, pages }; +} + +module.exports = { safetyPipeline, validateUpload }; +``` + +**Express route with safety** +```javascript +const multer = require('multer'); +const { safetyPipeline } = require('./middleware/file-safety'); +const { queueOCR } = require('./queue'); + +const upload = multer({ dest: './uploads/temp/' }); + +app.post('/api/upload', upload.single('manual'), async (req, res) => { + try { + // Safety pipeline + const { pages } = await safetyPipeline(req.file); + + // Move to permanent storage + const docId = generateId(); + const finalPath = `./data/boat-manuals/${docId}.pdf`; + fs.renameSync(req.file.path, finalPath); + + // Queue for OCR processing + const jobId = await queueOCR({ + filePath: finalPath, + docId, + boatId: req.body.boatId, + pages + }); + + res.json({ + docId, + jobId, + status: 'processing', + pages + }); + } catch (err) { + // Clean up on failure + if (req.file?.path && fs.existsSync(req.file.path)) { + fs.unlinkSync(req.file.path); + } + res.status(400).json({ error: err.message }); + } +}); + +// Job status endpoint +app.get('/api/jobs/:jobId', async (req, res) => { + const job = await ocrQueue.getJob(req.params.jobId); + res.json({ + id: job.id, + progress: job.progress, + state: await job.getState(), + result: job.returnvalue + }); +}); +``` + +--- + +## 3. Meilisearch Security + +### **Problem:** +Port 7700 exposed = public data. Master key in client code = disaster. + +### **Solution: Tenant Tokens** + +```javascript +// server/services/search.js +const { MeiliSearch } = require('meilisearch'); + +const client = new MeiliSearch({ + host: 'http://localhost:7700', + apiKey: process.env.MEILISEARCH_MASTER_KEY // NEVER send to client! +}); + +// Generate tenant token (short-lived, scoped) +function generateTenantToken(userId, boatIds) { + const searchRules = { + 'boat-manuals': { + filter: `boatId IN [${boatIds.map(id => `"${id}"`).join(', ')}]` + } + }; + + const token = client.generateTenantToken(searchRules, { + apiKey: process.env.MEILISEARCH_MASTER_KEY, + expiresAt: new Date(Date.now() + 3600 * 1000) // 1 hour + }); + + return token; +} + +// API endpoint to get search token +app.get('/api/search/token', requireAuth, async (req, res) => { + const userBoats = await getUserBoats(req.user.id); + const token = generateTenantToken(req.user.id, userBoats); + + res.json({ + token, + host: 'https://digital-lab.ca', // Through reverse proxy + expiresIn: 3600 + }); +}); + +module.exports = { client, generateTenantToken }; +``` + +**Frontend usage (safe)** +```javascript +// client/src/services/search.js +let searchClient = null; + +async function getSearchClient() { + if (!searchClient) { + // Fetch tenant token from backend + const { token, host } = await fetch('/api/search/token').then(r => r.json()); + + searchClient = new MeiliSearch({ + host, + apiKey: token // Scoped, time-limited token + }); + } + return searchClient; +} + +async function searchManuals(query) { + const client = await getSearchClient(); + const index = client.index('boat-manuals'); + + const results = await index.search(query, { + filter: 'system = "electrical"', // Additional client-side filter + attributesToHighlight: ['text', 'title'] + }); + + return results; +} +``` + +**Nginx reverse proxy (for Meilisearch)** +```nginx +# /etc/nginx/sites-available/digital-lab.ca +location /search/ { + proxy_pass http://localhost:7700/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + + # Only allow POST (search), block admin endpoints + limit_except POST { + deny all; + } +} +``` + +--- + +## 4. Backup Validation Script + +### **Problem:** +Everyone has backups. Few have restores. + +### **Solution: Automated Restore Testing** + +```bash +#!/bin/bash +# ~/bin/validate-backups + +set -e + +BACKUP_DIR=~/backups +TEST_DIR=/tmp/restore-test-$(date +%s) +LOG_FILE=~/logs/backup-validation.log + +echo "[$(date)] Starting backup validation" | tee -a "$LOG_FILE" + +# Create test directory +mkdir -p "$TEST_DIR" +cd "$TEST_DIR" + +# 1. Restore SQLite databases +echo "Testing SQLite restore..." | tee -a "$LOG_FILE" +LATEST_DB=$(ls -t "$BACKUP_DIR"/gitea-backup-*.tar.gz | head -1) + +tar -xzf "$LATEST_DB" gitea/data/gitea.db +sqlite3 gitea/data/gitea.db "PRAGMA integrity_check;" || { + echo "ERROR: SQLite integrity check failed" | tee -a "$LOG_FILE" + exit 1 +} + +echo "✓ SQLite database intact" | tee -a "$LOG_FILE" + +# 2. Restore and test Meilisearch dump +echo "Testing Meilisearch restore..." | tee -a "$LOG_FILE" +LATEST_MEILI=$(ls -t "$BACKUP_DIR"/meilisearch-*.dump | head -1) + +# Start temporary Meilisearch instance +/tmp/meilisearch --db-path "$TEST_DIR/meili-test" --import-dump "$LATEST_MEILI" --http-addr localhost:7777 & +MEILI_PID=$! +sleep 5 + +# Test search works +SEARCH_RESULT=$(curl -s http://localhost:7777/indexes/boat-manuals/search -d '{"q":"test"}') +if echo "$SEARCH_RESULT" | grep -q "hits"; then + echo "✓ Meilisearch restore successful" | tee -a "$LOG_FILE" +else + echo "ERROR: Meilisearch search failed" | tee -a "$LOG_FILE" + kill $MEILI_PID + exit 1 +fi + +kill $MEILI_PID + +# 3. Verify file backups +echo "Testing file restore..." | tee -a "$LOG_FILE" +SAMPLE_FILES=$(find "$BACKUP_DIR/boat-manuals" -type f | head -10) +FILE_COUNT=$(echo "$SAMPLE_FILES" | wc -l) + +if [ "$FILE_COUNT" -lt 1 ]; then + echo "ERROR: No backup files found" | tee -a "$LOG_FILE" + exit 1 +fi + +echo "✓ Found $FILE_COUNT sample files" | tee -a "$LOG_FILE" + +# 4. Test rclone remote +echo "Testing off-box backup..." | tee -a "$LOG_FILE" +rclone ls b2:boatvault-backups/$(date +%Y-%m) | head -5 || { + echo "ERROR: Off-box backup unreachable" | tee -a "$LOG_FILE" + exit 1 +} + +echo "✓ Off-box backup accessible" | tee -a "$LOG_FILE" + +# Cleanup +cd / +rm -rf "$TEST_DIR" + +echo "[$(date)] ✅ All backup validation tests passed" | tee -a "$LOG_FILE" + +# Send success notification (optional) +curl -X POST https://digital-lab.ca/api/notifications \ + -H "Content-Type: application/json" \ + -d '{"type":"backup-validation","status":"success"}' || true +``` + +**Cron job for monthly validation** +```bash +# crontab -e +0 3 1 * * /home/user/bin/validate-backups +``` + +--- + +## 5. Systemd Health Checks + +```javascript +// server/routes/health.js +const express = require('express'); +const router = express.Router(); +const { client: meilisearch } = require('../services/search'); +const db = require('../services/database'); + +router.get('/health', async (req, res) => { + const checks = { + app: 'ok', + database: 'unknown', + search: 'unknown', + queue: 'unknown' + }; + + let healthy = true; + + // Check database + try { + db.prepare('SELECT 1').get(); + checks.database = 'ok'; + } catch (err) { + checks.database = 'error'; + healthy = false; + } + + // Check Meilisearch + try { + await meilisearch.health(); + checks.search = 'ok'; + } catch (err) { + checks.search = 'error'; + healthy = false; + } + + // Check queue (if using Redis) + try { + const { Queue } = require('bullmq'); + const queue = new Queue('ocr-processing'); + await queue.isPaused(); + checks.queue = 'ok'; + } catch (err) { + checks.queue = 'error'; + healthy = false; + } + + res.status(healthy ? 200 : 503).json({ + status: healthy ? 'healthy' : 'degraded', + checks, + timestamp: new Date().toISOString() + }); +}); + +module.exports = router; +``` + +**Monitoring with systemd** +```ini +# ~/.config/systemd/user/boat-docs-healthcheck.service +[Unit] +Description=Boat Docs Health Check + +[Service] +Type=oneshot +ExecStart=/usr/bin/curl -f http://localhost:8080/health + +# ~/.config/systemd/user/boat-docs-healthcheck.timer +[Unit] +Description=Run boat-docs health check every 5 minutes + +[Timer] +OnBootSec=5min +OnUnitActiveSec=5min + +[Install] +WantedBy=timers.target +``` + +--- + +## 6. Security Headers & Rate Limiting + +```javascript +// server/middleware/security.js +const helmet = require('helmet'); +const rateLimit = require('express-rate-limit'); + +// Helmet configuration +const securityHeaders = helmet({ + contentSecurityPolicy: { + directives: { + defaultSrc: ["'self'"], + scriptSrc: ["'self'"], + styleSrc: ["'self'", "'unsafe-inline'"], // Tailwind might need this + imgSrc: ["'self'", "data:", "https:"], + connectSrc: ["'self'", "https://digital-lab.ca"], + fontSrc: ["'self'"], + objectSrc: ["'none'"], + mediaSrc: ["'self'"], + frameSrc: ["'none'"], + frameAncestors: ["'none'"] + } + }, + hsts: { + maxAge: 31536000, + includeSubDomains: true, + preload: true + } +}); + +// Rate limiters +const apiLimiter = rateLimit({ + windowMs: 15 * 60 * 1000, // 15 minutes + max: 100, // 100 requests per window + message: 'Too many requests, please try again later' +}); + +const uploadLimiter = rateLimit({ + windowMs: 60 * 60 * 1000, // 1 hour + max: 10, // 10 uploads per hour + message: 'Upload limit exceeded' +}); + +const searchLimiter = rateLimit({ + windowMs: 1 * 60 * 1000, // 1 minute + max: 30, // 30 searches per minute + message: 'Search rate limit exceeded' +}); + +module.exports = { + securityHeaders, + apiLimiter, + uploadLimiter, + searchLimiter +}; +``` + +**Apply in Express** +```javascript +const { securityHeaders, apiLimiter, uploadLimiter, searchLimiter } = require('./middleware/security'); + +// Global security +app.use(securityHeaders); + +// Per-route rate limiting +app.use('/api/', apiLimiter); +app.post('/api/upload', uploadLimiter, uploadHandler); +app.post('/api/search', searchLimiter, searchHandler); +``` + +--- + +## 7. Gitea Upgrade Procedure + +```bash +#!/bin/bash +# ~/bin/upgrade-gitea + +set -e + +GITEA_VERSION="1.24.0" +GITEA_BINARY="/tmp/gitea" +BACKUP_DIR=~/backups/gitea-pre-upgrade-$(date +%Y%m%d-%H%M%S) + +echo "Upgrading Gitea to $GITEA_VERSION" + +# 1. Stop Gitea +echo "Stopping Gitea..." +systemctl --user stop gitea.service || ssh stackcp "systemctl --user stop gitea.service" + +# 2. Backup current version +echo "Creating backup..." +mkdir -p "$BACKUP_DIR" +cp -r ~/gitea "$BACKUP_DIR/" +cp "$GITEA_BINARY" "$BACKUP_DIR/gitea.old" + +# 3. Download new version +echo "Downloading Gitea $GITEA_VERSION..." +curl -fsSL "https://dl.gitea.com/gitea/$GITEA_VERSION/gitea-$GITEA_VERSION-linux-amd64" -o "$GITEA_BINARY.new" +chmod 755 "$GITEA_BINARY.new" + +# 4. Test new binary +echo "Testing new binary..." +"$GITEA_BINARY.new" --version + +# 5. Replace binary +mv "$GITEA_BINARY" "$GITEA_BINARY.old" +mv "$GITEA_BINARY.new" "$GITEA_BINARY" + +# 6. Start Gitea +echo "Starting Gitea..." +systemctl --user start gitea.service || ssh stackcp "systemctl --user start gitea.service" + +# 7. Verify +sleep 5 +if curl -f http://localhost:4000/ > /dev/null 2>&1; then + echo "✅ Gitea upgrade successful to $GITEA_VERSION" + "$GITEA_BINARY" --version +else + echo "❌ Gitea failed to start, rolling back..." + mv "$GITEA_BINARY.old" "$GITEA_BINARY" + systemctl --user start gitea.service + exit 1 +fi +``` + +--- + +## Summary: Production Hardening Checklist + +- [ ] Background queue for OCR (BullMQ or SQLite) +- [ ] File safety pipeline (qpdf, ClamAV, validation) +- [ ] Meilisearch tenant tokens (never expose master key) +- [ ] Backup validation script (monthly restore tests) +- [ ] Health check endpoints + monitoring +- [ ] Security headers (helmet, CSP, HSTS) +- [ ] Rate limiting (upload, search, API) +- [ ] Gitea 1.24.0 upgrade +- [ ] logrotate for application logs +- [ ] systemd Restart=on-failure for all services + +**Deploy these before showing BoatVault to real users.** + diff --git a/docs/architecture/meilisearch-config.json b/docs/architecture/meilisearch-config.json new file mode 100644 index 0000000..dff982a --- /dev/null +++ b/docs/architecture/meilisearch-config.json @@ -0,0 +1,276 @@ +{ + "indexName": "navidocs-pages", + "description": "NaviDocs search index for boat manual pages (multi-vertical support)", + "version": "1.0.0", + + "settings": { + "searchableAttributes": [ + "title", + "text", + "systems", + "categories", + "tags", + "entityName", + "componentName", + "manufacturer", + "modelNumber", + "boatName" + ], + + "filterableAttributes": [ + "vertical", + "organizationId", + "entityId", + "entityType", + "userId", + "docId", + "documentType", + "systems", + "categories", + "boatMake", + "boatModel", + "boatYear", + "vesselType", + "status", + "priority", + "language", + "complianceType", + "propertyType", + "facilityType" + ], + + "sortableAttributes": [ + "createdAt", + "updatedAt", + "pageNumber", + "year", + "ocrConfidence", + "inspectionDate", + "nextDue" + ], + + "displayedAttributes": [ + "*" + ], + + "synonyms": { + "bilge": ["sump", "drain", "bilge pump"], + "head": ["toilet", "marine toilet", "WC", "lavatory"], + "galley": ["kitchen"], + "helm": ["steering", "wheel", "cockpit controls"], + "bow": ["front", "forward"], + "stern": ["aft", "back", "rear"], + "port": ["left"], + "starboard": ["right"], + "VHF": ["radio", "marine radio"], + "GPS": ["chartplotter", "navigation system", "plotter"], + "autopilot": ["auto helm", "auto pilot"], + "windlass": ["anchor winch", "anchor windlass"], + "thruster": ["bow thruster", "stern thruster"], + "generator": ["gen", "genset"], + "inverter": ["power inverter"], + "shore power": ["dock power", "land power"], + "seacock": ["through-hull", "thru-hull"], + "battery": ["batteries", "house bank"], + "water tank": ["fresh water tank", "water storage"], + "holding tank": ["waste tank", "black water tank"], + "grey water": ["gray water", "shower drain"], + "HVAC": ["air conditioning", "heating", "climate control"], + "engine": ["motor", "powerplant"], + "transmission": ["gearbox", "drive"], + "impeller": ["water pump impeller"], + "alternator": ["charging system"], + "starter": ["starting motor"], + "fuel filter": ["fuel separator", "racor"], + "water heater": ["hot water heater", "calorifier"], + "refrigerator": ["fridge", "ice box"], + "freezer": ["deep freeze"], + "microwave": ["microwave oven"], + "stove": ["cooktop", "range"], + "oven": ["baking oven"], + "anchor": ["ground tackle"], + "chain": ["anchor chain", "rode"], + "rope": ["line", "dockline"], + "fender": ["bumper"], + "davit": ["crane", "lifting davit"] + }, + + "stopWords": [ + "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for", + "of", "with", "by", "from", "as", "is", "was", "are", "be", "been", + "being", "have", "has", "had", "do", "does", "did", "will", "would", + "could", "should", "may", "might", "must", "can", "this", "that", + "these", "those", "it", "its", "it's" + ], + + "rankingRules": [ + "words", + "typo", + "proximity", + "attribute", + "sort", + "exactness" + ], + + "typoTolerance": { + "enabled": true, + "minWordSizeForTypos": { + "oneTypo": 4, + "twoTypos": 8 + }, + "disableOnWords": [], + "disableOnAttributes": [] + }, + + "faceting": { + "maxValuesPerFacet": 100 + }, + + "pagination": { + "maxTotalHits": 10000 + }, + + "separatorTokens": [ + ".", ",", ";", ":", "!", "?", "-", "_", "/", "\\", "|" + ], + + "nonSeparatorTokens": [] + }, + + "documentStructure": { + "description": "Expected structure for indexed documents", + "schema": { + "id": "string (required) - Format: page_{docId}_p{pageNum}", + "vertical": "string (required) - boating | marina | property", + + "organizationId": "string (required) - Organization UUID", + "organizationName": "string (required) - Organization display name", + + "entityId": "string (required) - Entity UUID (boat, marina, condo)", + "entityName": "string (required) - Entity display name", + "entityType": "string (required) - boat | marina | condo", + + "subEntityId": "string (optional) - System, dock, or unit UUID", + "subEntityName": "string (optional) - Sub-entity name", + + "componentId": "string (optional) - Component UUID", + "componentName": "string (optional) - Component name", + + "docId": "string (required) - Document UUID", + "userId": "string (required) - Owner/uploader UUID", + + "documentType": "string (required) - manual | service-record | inspection | certificate", + "title": "string (required) - Page or section title", + "pageNumber": "number (required) - 1-based page index", + "text": "string (required) - Full OCR extracted text", + + "systems": "array (optional) - electrical, plumbing, navigation, etc", + "categories": "array (optional) - maintenance, troubleshooting, safety, etc", + "tags": "array (optional) - bilge, pump, generator, etc", + + "boatName": "string (optional) - Boat name for display", + "boatMake": "string (optional) - Prestige, Beneteau, etc", + "boatModel": "string (optional) - F4.9, Oceanis 45, etc", + "boatYear": "number (optional) - 2024", + "vesselType": "string (optional) - powerboat | sailboat | catamaran | trawler", + + "manufacturer": "string (optional) - Component manufacturer", + "modelNumber": "string (optional) - Component model", + "serialNumber": "string (optional) - Component serial", + + "language": "string (required) - en | fr | es | de", + "ocrConfidence": "number (optional) - 0.0 to 1.0", + + "priority": "string (optional) - critical | normal | reference", + "offlineCache": "boolean (optional) - Should be cached offline", + + "complianceType": "string (optional) - electrical-inspection | fire-safety | ada-compliance", + "inspectionDate": "number (optional) - Unix timestamp", + "nextDue": "number (optional) - Unix timestamp", + "status": "string (optional) - compliant | pending | failed", + + "location": "object (optional) - Physical location metadata", + "location.building": "string (optional) - Dock 1, Building A", + "location.gps": "object (optional) - GPS coordinates", + "location.gps.lat": "number (optional) - Latitude", + "location.gps.lon": "number (optional) - Longitude", + + "createdAt": "number (required) - Unix timestamp", + "updatedAt": "number (required) - Unix timestamp", + + "embedding": "array (optional) - Future: 1536 float vector for semantic search" + } + }, + + "exampleDocument": { + "id": "page_doc_abc123_p7", + "vertical": "boating", + + "organizationId": "org_xyz789", + "organizationName": "Smith Family Boats", + + "entityId": "boat_prestige_f49_001", + "entityName": "Sea Breeze", + "entityType": "boat", + + "subEntityId": "system_plumbing_001", + "subEntityName": "Plumbing System", + + "componentId": "comp_webasto_heater_001", + "componentName": "Webasto Water Heater", + + "docId": "doc_abc123", + "userId": "user_456", + + "documentType": "component-manual", + "title": "8.7 Blackwater System - Maintenance", + "pageNumber": 7, + "text": "The blackwater pump is located in the aft compartment beneath the master berth. To access the pump, remove the inspection panel located...", + + "systems": ["plumbing", "waste-management"], + "categories": ["maintenance", "troubleshooting"], + "tags": ["bilge", "pump", "blackwater", "waste"], + + "boatName": "Sea Breeze", + "boatMake": "Prestige", + "boatModel": "F4.9", + "boatYear": 2024, + "vesselType": "powerboat", + + "manufacturer": "Webasto", + "modelNumber": "FCF Platinum Series", + "serialNumber": "WB-2024-12345", + + "language": "en", + "ocrConfidence": 0.94, + + "priority": "normal", + "offlineCache": true, + + "createdAt": 1740234567, + "updatedAt": 1740234567, + + "embedding": null + }, + + "tenantTokenConfig": { + "description": "Tenant tokens for secure client-side search", + "expiresIn": 3600, + "searchRules": { + "navidocs-pages": { + "filter": "userId = {{userId}} OR organizationId IN {{organizationIds}}" + } + } + }, + + "notes": { + "masterKey": "NEVER expose master key to client! Use tenant tokens only.", + "indexUpdates": "Synonyms can be updated without re-indexing", + "futureFeatures": [ + "Vector search with pgvector or Qdrant integration", + "Multi-language indexes (separate per language)", + "Geo-search for property/marina vertical", + "Faceted search UI" + ] + } +} diff --git a/docs/roadmap/2-week-launch-plan.md b/docs/roadmap/2-week-launch-plan.md new file mode 100644 index 0000000..f7dba91 --- /dev/null +++ b/docs/roadmap/2-week-launch-plan.md @@ -0,0 +1,337 @@ +# BoatVault Launch: 2-Week Execution Plan + +**Goal:** Hardened MVP ready for beta users + +--- + +## Week 1: Infrastructure & Security + +### Monday - Queue & Worker Infrastructure +- [ ] **Morning:** Choose queue system (Redis available? → BullMQ, else → SQLite queue) +- [ ] **Afternoon:** Implement queue wrapper + basic worker +- [ ] **EOD:** Test: Upload dummy PDF → job queued → worker processes → completes + - Acceptance: Job status endpoint returns progress + +**Deliverable:** Working background processing + +--- + +### Tuesday - File Safety Pipeline +- [ ] **Morning:** Install/verify `qpdf` and `ClamAV` on StackCP + ```bash + ssh stackcp "which qpdf || echo 'Need to install qpdf'" + ssh stackcp "which clamscan || echo 'Need to install clamav'" + ``` +- [ ] **Afternoon:** Implement `file-safety.js` middleware + - Extension validation + - Magic byte check + - qpdf sanitization + - ClamAV scan (warn if missing, don't fail) +- [ ] **EOD:** Test: Upload malformed PDF → rejected. Upload valid PDF → sanitized + +**Deliverable:** Upload endpoint refuses bad files + +--- + +### Wednesday - Gitea Upgrade +- [ ] **Morning:** Backup current Gitea (local) + ```bash + tar -czf ~/backups/gitea-pre-1.24-$(date +%Y%m%d).tar.gz ~/gitea/ + ``` +- [ ] **Afternoon:** Test upgrade on StackCP + - Download 1.24.0 + - Stop service → upgrade → start → verify +- [ ] **EOD:** Confirm version 1.24.0 running, all repos accessible + +**Deliverable:** Gitea upgraded, CVE-2024-45337 fixed + +--- + +### Thursday - Meilisearch Security +- [ ] **Morning:** Rotate Meilisearch master key + ```bash + # Generate new key + openssl rand -hex 32 + # Update .env on StackCP + # Restart Meilisearch + ``` +- [ ] **Afternoon:** Implement tenant token generation + - Backend endpoint: `/api/search/token` + - Returns scoped, time-limited token (1 hour TTL) +- [ ] **EOD:** Test: Frontend gets token → searches work → token expires → re-fetch + +**Deliverable:** Meilisearch master key never exposed to client + +--- + +### Friday - Health Checks & Monitoring +- [ ] **Morning:** Add `/health` endpoint to boat-docs API + - Check database, Meilisearch, queue +- [ ] **Afternoon:** Set up systemd health check timer + ```bash + systemctl --user enable boat-docs-healthcheck.timer + systemctl --user start boat-docs-healthcheck.timer + ``` +- [ ] **EOD:** Add external uptime monitor (UptimeRobot free tier) + +**Deliverable:** Automated health checks every 5 minutes + +--- + +## Week 2: MVP Features & Launch Prep + +### Monday - MVP Backend API +- [ ] **Morning:** Upload endpoint with safety pipeline + queue + ``` + POST /api/upload + → validate file + → sanitize + → queue OCR job + → return jobId + ``` +- [ ] **Afternoon:** Job status endpoint + ``` + GET /api/jobs/:jobId + → return progress, state, result + ``` +- [ ] **EOD:** OCR worker extracts text + indexes in Meilisearch + +**Deliverable:** End-to-end: Upload PDF → OCR → Searchable + +--- + +### Tuesday - Search & Retrieval +- [ ] **Morning:** Search endpoint with tenant tokens + ``` + POST /api/search + → verify auth + → generate tenant token + → forward to Meilisearch + ``` +- [ ] **Afternoon:** Document retrieval + ``` + GET /api/documents/:docId + → verify ownership + → return metadata + PDF URL + ``` +- [ ] **EOD:** Test: Search "electrical" → find relevant manual pages + +**Deliverable:** Working search with proper auth + +--- + +### Wednesday - Frontend MVP +- [ ] **Morning:** Upload UI (Vue.js component) + - File picker + - Progress bar (polls job status) + - Success/error handling +- [ ] **Afternoon:** Search UI + - Search bar + - Results list + - Highlight matches +- [ ] **EOD:** PDF viewer (pdf.js or simple ``) + +**Deliverable:** Working UI for upload → search → view + +--- + +### Thursday - Security Hardening +- [ ] **Morning:** Add helmet + security headers + ```javascript + app.use(helmet({ /* CSP config */ })); + ``` +- [ ] **Afternoon:** Implement rate limiting + - Upload: 10/hour + - Search: 30/minute + - API: 100/15min +- [ ] **EOD:** Test rate limits trigger correctly + +**Deliverable:** Production-grade security headers + +--- + +### Friday - Backups & Documentation +- [ ] **Morning:** Set up backup validation script + ```bash + ~/bin/validate-backups + # Add to cron: 0 3 1 * * + ``` +- [ ] **Afternoon:** Run restore drill + - Restore from last night's backup + - Verify SQLite integrity + - Verify Meilisearch index + - Document time-to-restore +- [ ] **EOD:** Write deployment runbook + - How to deploy updates + - How to rollback + - Emergency contacts + +**Deliverable:** Proven backup/restore process + +--- + +## Weekend - Soft Launch + +### Saturday - Beta Testing +- [ ] Deploy to production +- [ ] Invite 3-5 beta users (boat owners you know) +- [ ] Give them test manuals to upload +- [ ] Watch logs for errors + +### Sunday - Bug Fixes & Iteration +- [ ] Fix critical bugs found Saturday +- [ ] Gather feedback +- [ ] Plan v1.1 features based on usage + +--- + +## Success Criteria (Must-Have) + +- [x] Upload PDF → queued → OCR'd → searchable (< 5 min for 100-page manual) +- [x] Search returns relevant results in < 100ms +- [x] No master keys in client code +- [x] All uploads pass safety pipeline +- [x] Health checks report 200 OK +- [x] Backups restore successfully +- [x] Uptime monitor shows green +- [x] 3+ beta users successfully uploaded manuals + +--- + +## Nice-to-Have (v1.1+) + +- [ ] Multi-boat organization (user owns multiple boats) +- [ ] Share manual with crew +- [ ] OCR confidence scoring (highlight low-confidence text) +- [ ] Mobile-optimized UI +- [ ] Offline PWA mode +- [ ] Annotations on PDF pages +- [ ] Version history (updated manuals) + +--- + +## Daily Standup Questions + +Each morning ask yourself: + +1. **What did I ship yesterday?** (working code, not just "made progress") +2. **What am I shipping today?** (one specific deliverable) +3. **What's blocking me?** (missing tools, unclear requirements, bugs) + +--- + +## Risk Mitigation + +| Risk | Probability | Impact | Mitigation | +|------|-------------|--------|------------| +| StackCP CPU throttling during OCR | High | High | Queue throttles to 1 job at a time, add delays | +| qpdf/ClamAV not available | Medium | Medium | Install via SSH or skip with warning logs | +| Beta users find critical bug | Medium | High | Have rollback plan ready, feature flags | +| Meilisearch index corruption | Low | High | Daily dumps, test restores monthly | +| Shared hosting 502s under load | Low | Medium | Cloudflare CDN, rate limiting prevents abuse | + +--- + +## Post-Launch Monitoring (Week 3+) + +### Metrics to Track (Matomo) +- Uploads per day +- Search queries per day +- Average OCR processing time +- Failed uploads (% and reasons) +- Most searched terms (what manuals are missing?) + +### Alerts to Set Up +- Health check fails 3 times in a row → email +- Upload queue > 10 jobs → investigate slow processing +- Disk usage > 80% → cleanup old temp files +- Error rate > 5% of requests → investigate + +--- + +## Deployment Checklist (Before Each Deploy) + +- [ ] Run tests locally (when you write them) +- [ ] Backup current production state +- [ ] Deploy to staging (use subdomain: staging.digital-lab.ca) +- [ ] Smoke test on staging +- [ ] Deploy to production +- [ ] Verify health endpoint +- [ ] Test one upload end-to-end +- [ ] Monitor logs for 15 minutes +- [ ] Announce deploy in changelog (if user-facing changes) + +--- + +## When to Declare v1.0 "Done" + +- ✅ 10+ real boat manuals uploaded by beta users +- ✅ 100+ successful searches performed +- ✅ Zero critical bugs in last 3 days +- ✅ Backup restore tested and documented +- ✅ Uptime > 99.5% over 7 days +- ✅ Beta users say "I'd pay for this" + +**Then:** Open registration, announce on boating forums, iterate based on feedback. + +--- + +## Budget Reality Check + +**Time Investment:** +- Week 1: 30-40 hours (infrastructure) +- Week 2: 30-40 hours (features) +- Ongoing: 5-10 hours/week (support, bug fixes, features) + +**Cost:** +- StackCP: $existing (no added cost) +- Domain: ~$12/year (navidocs.com or boatvault.com) +- ClamAV/qpdf: Free (open source) +- Redis: Free (if needed, small instance) +- Monitoring: Free (UptimeRobot free tier) + +**Total new cost: ~$12-15/year** + +--- + +## The "Oh Shit" Scenarios + +### Scenario 1: StackCP Bans OCR Workers +**Symptom:** Account suspended for CPU abuse +**Solution:** Move worker to $5/mo VPS, keep API on StackCP, communicate via queue + +### Scenario 2: Meilisearch Index Corrupted +**Symptom:** Search returns errors +**Solution:** +```bash +# Stop Meilisearch +systemctl --user stop meilisearch +# Restore from last dump +meilisearch --import-dump ~/backups/latest.dump +# Restart +systemctl --user start meilisearch +``` + +### Scenario 3: User Uploads 50GB of Manuals +**Symptom:** Disk space alert +**Solution:** +- Implement per-user quota (5GB default) +- Add disk usage endpoint +- Offer paid tiers for more storage + +### Scenario 4: Beta User Finds Their Manual Public +**Symptom:** Privacy breach report +**Solution:** +- Verify tenant tokens are working +- Check Meilisearch filters are applied +- Audit access logs +- If breach confirmed: + 1. Immediately delete document + 2. Rotate all tokens + 3. Email affected user + 4. Root cause analysis + +--- + +**Ship it. Learn from users. Iterate.** + diff --git a/docs/roadmap/v1.0-mvp.md b/docs/roadmap/v1.0-mvp.md new file mode 100644 index 0000000..5077af5 --- /dev/null +++ b/docs/roadmap/v1.0-mvp.md @@ -0,0 +1,153 @@ +# NaviDocs v1.0 MVP Roadmap + +**Goal:** Launch production-ready boat manual management platform +**Timeline:** 2 weeks intensive development +**Target:** Beta launch with 5-10 boat owners + +--- + +## Phase 1: Foundation (Week 1) + +### Day 1: Infrastructure Setup ✓ +- [x] Create NaviDocs repository +- [x] Expert panel debates completed +- [x] Schema design finalized +- [ ] Set up development environment +- [ ] Install dependencies (Node.js, Meilisearch, SQLite) + +### Day 2: Database & Queue System +- [ ] Implement SQLite schema (users, boats, documents, pages) +- [ ] Set up BullMQ or SQLite-based queue +- [ ] Create OCR job worker +- [ ] Test: Enqueue job → process → complete + +### Day 3: File Safety Pipeline +- [ ] Install qpdf and ClamAV +- [ ] Implement multi-layer validation + - Extension check + - Magic byte verification + - qpdf sanitization + - ClamAV malware scan +- [ ] Test: Upload malicious PDF → rejected + +### Day 4: Meilisearch Integration +- [ ] Configure Meilisearch index with settings + - Searchable attributes + - Filterable attributes + - Synonyms (boat terminology) +- [ ] Implement tenant token generation +- [ ] Create search service wrapper + +### Day 5: OCR Pipeline +- [ ] Implement Tesseract.js OCR extraction +- [ ] Page-by-page processing +- [ ] Index extracted text in Meilisearch +- [ ] Test: Upload PDF → OCR → searchable in < 5 min + +--- + +## Phase 2: Core Features (Week 2) + +### Day 6: Backend API +- [ ] POST /api/upload - with safety pipeline +- [ ] GET /api/jobs/:id - job status +- [ ] POST /api/search - with tenant tokens +- [ ] GET /api/documents/:id - retrieve document +- [ ] Helmet security headers +- [ ] Rate limiting + +### Day 7: Frontend Foundation +- [ ] Vue 3 + Vite setup +- [ ] Tailwind CSS configuration +- [ ] Meilisearch-inspired design system +- [ ] SVG icon library (clean, professional) +- [ ] Responsive layout + +### Day 8: Upload & Job Tracking +- [ ] File upload component +- [ ] Drag-and-drop support +- [ ] Progress bar (polls job status) +- [ ] Error handling and validation feedback +- [ ] Success state with document preview + +### Day 9: Search Interface +- [ ] Search bar with instant results +- [ ] Filters (system, category, boat) +- [ ] Result highlighting +- [ ] Pagination +- [ ] Sort options (relevance, date, page number) + +### Day 10: Document Viewer +- [ ] PDF.js integration +- [ ] Page navigation +- [ ] Search within document +- [ ] Highlight search terms +- [ ] Bookmarks (future: v1.1) + +--- + +## Phase 3: Polish & Launch (Weekend) + +### Day 11: Testing & Debugging +- [ ] Playwright end-to-end tests + - Upload flow + - Search flow + - Document viewing +- [ ] Cross-browser testing +- [ ] Mobile responsiveness +- [ ] Performance profiling + +### Day 12: Beta Launch +- [ ] Deploy to local environment +- [ ] Invite 5 beta testers +- [ ] Provide test manuals +- [ ] Monitor logs and usage +- [ ] Gather feedback + +--- + +## Success Criteria + +**Technical:** +- [ ] Upload PDF → searchable in < 5 minutes +- [ ] Search latency < 100ms +- [ ] Synonym search works ("bilge" finds "sump pump") +- [ ] All fields display correctly +- [ ] Offline mode functional (PWA) + +**User Experience:** +- [ ] Upload success rate > 95% +- [ ] Zero malicious files accepted +- [ ] Search relevance rated 4/5+ by users +- [ ] Mobile usable without zooming + +**Security:** +- [ ] No master keys in client code +- [ ] Tenant tokens expire after 1 hour +- [ ] Rate limits prevent abuse +- [ ] All PDFs sanitized with qpdf + +--- + +## Post-MVP Roadmap (v1.1+) + +### Planned Features +- [ ] Multi-boat support +- [ ] Share manuals with crew +- [ ] Bookmarks and annotations +- [ ] Service history tracking +- [ ] Maintenance reminders +- [ ] Shared component library +- [ ] Mobile apps (iOS/Android) +- [ ] Semantic search (embeddings) + +### Future Verticals +- [ ] Marina/property management +- [ ] Waterfront HOA documentation +- [ ] Yacht club member resources + +--- + +**Status:** In Development +**Last Updated:** 2025-01-19 +**Next Review:** After beta launch