navidocs/server/examples/ocr-integration.js
ggq-admin 155a8c0305 feat: NaviDocs MVP - Complete codebase extraction from lilian1
## Backend (server/)
- Express 5 API with security middleware (helmet, rate limiting)
- SQLite database with WAL mode (schema from docs/architecture/)
- Meilisearch integration with tenant tokens
- BullMQ + Redis background job queue
- OCR pipeline with Tesseract.js
- File safety validation (extension, MIME, size)
- 4 API route modules: upload, jobs, search, documents

## Frontend (client/)
- Vue 3 with Composition API (<script setup>)
- Vite 5 build system with HMR
- Tailwind CSS (Meilisearch-inspired design)
- UploadModal with drag-and-drop
- FigureZoom component (ported from lilian1)
- Meilisearch search integration with tenant tokens
- Job polling composable
- Clean SVG icons (no emojis)

## Code Extraction
-  manuals.js → UploadModal.vue, useJobPolling.js
-  figure-zoom.js → FigureZoom.vue
-  service-worker.js → client/public/service-worker.js (TODO)
-  glossary.json → Merged into Meilisearch synonyms
-  Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk)

## Documentation
- Complete extraction plan in docs/analysis/
- README with quick start guide
- Architecture summary in docs/architecture/

## Build Status
- Server dependencies:  Installed (234 packages)
- Client dependencies:  Installed (160 packages)
- Client build:  Successful (2.63s)

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 01:55:44 +02:00

291 lines
7.5 KiB
JavaScript

/**
* OCR Integration Example
*
* This example demonstrates the complete OCR pipeline workflow:
* 1. Upload a PDF document
* 2. Create OCR job in database
* 3. Queue job for background processing
* 4. Monitor job progress
* 5. Search indexed content
*
* Usage: node examples/ocr-integration.js
*/
import { v4 as uuidv4 } from 'uuid';
import { getDb } from '../config/db.js';
import { addOcrJob, getJobStatus } from '../services/queue.js';
import { searchPages } from '../services/search.js';
import { createReadStream, statSync } from 'fs';
import { createHash } from 'crypto';
/**
* Example 1: Complete document upload and OCR workflow
*/
async function uploadAndProcessDocument() {
console.log('=== Example 1: Upload and Process Document ===\n');
const db = getDb();
// Simulate uploaded file
const filePath = './uploads/boat-manual.pdf';
const fileStats = statSync(filePath);
const fileHash = createHash('sha256')
.update(createReadStream(filePath))
.digest('hex');
// Create document record
const documentId = uuidv4();
const now = Math.floor(Date.now() / 1000);
db.prepare(`
INSERT INTO documents (
id, organization_id, entity_id, uploaded_by,
title, document_type, file_path, file_name,
file_size, file_hash, page_count,
status, created_at, updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 'processing', ?, ?)
`).run(
documentId,
'org_demo_123', // Organization ID
'boat_demo_456', // Boat/Entity ID
'user_demo_789', // User ID
'Prestige F4.9 Owner Manual',
'owner-manual',
filePath,
'boat-manual.pdf',
fileStats.size,
fileHash,
50, // Page count (would be detected from PDF)
now,
now
);
console.log(`✓ Document created: ${documentId}`);
// Create OCR job in database
const jobId = uuidv4();
db.prepare(`
INSERT INTO ocr_jobs (id, document_id, status, progress, created_at)
VALUES (?, ?, 'pending', 0, ?)
`).run(jobId, documentId, now);
console.log(`✓ OCR job created: ${jobId}`);
// Add job to BullMQ queue
await addOcrJob(documentId, jobId, {
filePath: filePath
});
console.log(`✓ Job queued for background processing`);
return { documentId, jobId };
}
/**
* Example 2: Monitor job progress
*/
async function monitorJobProgress(jobId) {
console.log('\n=== Example 2: Monitor Job Progress ===\n');
const db = getDb();
// Poll for progress every 2 seconds
const checkProgress = setInterval(async () => {
const job = db.prepare(`
SELECT status, progress, error FROM ocr_jobs WHERE id = ?
`).get(jobId);
console.log(`Status: ${job.status} | Progress: ${job.progress}%`);
if (job.status === 'completed') {
console.log('✓ OCR processing completed!');
clearInterval(checkProgress);
} else if (job.status === 'failed') {
console.error(`✗ Job failed: ${job.error}`);
clearInterval(checkProgress);
}
}, 2000);
// Also check BullMQ status
const bullStatus = await getJobStatus(jobId);
if (bullStatus) {
console.log(`BullMQ State: ${bullStatus.state}`);
}
}
/**
* Example 3: Search indexed content
*/
async function searchDocumentContent(documentId) {
console.log('\n=== Example 3: Search Document Content ===\n');
// Wait for indexing to complete
await new Promise(resolve => setTimeout(resolve, 5000));
// Search for specific content
const queries = [
'bilge pump',
'electrical system',
'maintenance schedule',
'safety equipment'
];
for (const query of queries) {
console.log(`\nSearching for: "${query}"`);
const results = await searchPages(query, {
filter: `docId = "${documentId}"`,
limit: 3
});
if (results.hits.length > 0) {
console.log(`Found ${results.hits.length} matches:`);
results.hits.forEach((hit, index) => {
console.log(` ${index + 1}. Page ${hit.pageNumber} (confidence: ${(hit.ocrConfidence * 100).toFixed(0)}%)`);
console.log(` "${hit.text.substring(0, 100)}..."`);
});
} else {
console.log(' No matches found');
}
}
}
/**
* Example 4: Get document pages with OCR data
*/
async function getDocumentPages(documentId) {
console.log('\n=== Example 4: Get Document Pages ===\n');
const db = getDb();
const pages = db.prepare(`
SELECT
page_number,
ocr_confidence,
LENGTH(ocr_text) as text_length,
ocr_completed_at,
search_indexed_at
FROM document_pages
WHERE document_id = ?
ORDER BY page_number
LIMIT 10
`).all(documentId);
console.log(`Document has ${pages.length} pages indexed:\n`);
pages.forEach(page => {
console.log(`Page ${page.page_number}:`);
console.log(` OCR Confidence: ${(page.ocr_confidence * 100).toFixed(0)}%`);
console.log(` Text Length: ${page.text_length} characters`);
console.log(` Indexed: ${page.search_indexed_at ? '✓' : '✗'}`);
});
}
/**
* Example 5: Multi-vertical search
*/
async function multiVerticalSearch() {
console.log('\n=== Example 5: Multi-Vertical Search ===\n');
// Search across all boat documents
const boatResults = await searchPages('engine maintenance', {
filter: 'vertical = "boating"',
limit: 5
});
console.log(`Boat documents: ${boatResults.hits.length} results`);
// Search property/condo documents
const propertyResults = await searchPages('HVAC system', {
filter: 'vertical = "property"',
limit: 5
});
console.log(`Property documents: ${propertyResults.hits.length} results`);
// Search by organization
const orgResults = await searchPages('safety', {
filter: 'organizationId = "org_demo_123"',
limit: 10
});
console.log(`Organization documents: ${orgResults.hits.length} results`);
}
/**
* Example 6: Advanced filtering and sorting
*/
async function advancedSearch() {
console.log('\n=== Example 6: Advanced Search ===\n');
// Search with multiple filters
const results = await searchPages('pump', {
filter: [
'vertical = "boating"',
'systems IN ["plumbing", "waste-management"]',
'ocrConfidence > 0.8'
].join(' AND '),
sort: ['pageNumber:asc'],
limit: 10
});
console.log(`Found ${results.hits.length} high-confidence plumbing pages`);
// Search by boat make/model
const prestigeResults = await searchPages('', {
filter: 'boatMake = "Prestige" AND boatModel = "F4.9"',
limit: 20
});
console.log(`Found ${prestigeResults.hits.length} Prestige F4.9 pages`);
}
/**
* Run all examples
*/
async function runExamples() {
try {
console.log('NaviDocs OCR Integration Examples\n');
console.log('===================================\n');
// Example 1: Upload and process
const { documentId, jobId } = await uploadAndProcessDocument();
// Example 2: Monitor progress
await monitorJobProgress(jobId);
// Example 3: Search content
await searchDocumentContent(documentId);
// Example 4: Get pages
await getDocumentPages(documentId);
// Example 5: Multi-vertical search
await multiVerticalSearch();
// Example 6: Advanced search
await advancedSearch();
console.log('\n✅ All examples completed!\n');
process.exit(0);
} catch (error) {
console.error('Error running examples:', error);
process.exit(1);
}
}
// Run if executed directly
if (import.meta.url === `file://${process.argv[1]}`) {
runExamples();
}
// Export for use in other modules
export {
uploadAndProcessDocument,
monitorJobProgress,
searchDocumentContent,
getDocumentPages,
multiVerticalSearch,
advancedSearch
};