navidocs/server/scripts/test-ocr.js
ggq-admin 155a8c0305 feat: NaviDocs MVP - Complete codebase extraction from lilian1
## Backend (server/)
- Express 5 API with security middleware (helmet, rate limiting)
- SQLite database with WAL mode (schema from docs/architecture/)
- Meilisearch integration with tenant tokens
- BullMQ + Redis background job queue
- OCR pipeline with Tesseract.js
- File safety validation (extension, MIME, size)
- 4 API route modules: upload, jobs, search, documents

## Frontend (client/)
- Vue 3 with Composition API (<script setup>)
- Vite 5 build system with HMR
- Tailwind CSS (Meilisearch-inspired design)
- UploadModal with drag-and-drop
- FigureZoom component (ported from lilian1)
- Meilisearch search integration with tenant tokens
- Job polling composable
- Clean SVG icons (no emojis)

## Code Extraction
-  manuals.js → UploadModal.vue, useJobPolling.js
-  figure-zoom.js → FigureZoom.vue
-  service-worker.js → client/public/service-worker.js (TODO)
-  glossary.json → Merged into Meilisearch synonyms
-  Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk)

## Documentation
- Complete extraction plan in docs/analysis/
- README with quick start guide
- Architecture summary in docs/architecture/

## Build Status
- Server dependencies:  Installed (234 packages)
- Client dependencies:  Installed (160 packages)
- Client build:  Successful (2.63s)

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 01:55:44 +02:00

82 lines
2.7 KiB
JavaScript
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Test script for OCR pipeline
*
* Usage: node scripts/test-ocr.js
*/
import { checkPDFTools } from '../services/ocr.js';
import { getMeilisearchIndex } from '../config/meilisearch.js';
import { getDb } from '../config/db.js';
async function testOCRPipeline() {
console.log('NaviDocs OCR Pipeline Test\n');
// 1. Check PDF conversion tools
console.log('1. Checking PDF conversion tools...');
const tools = checkPDFTools();
console.log(' - pdftoppm:', tools.pdftoppm ? '✓ Available' : '✗ Not found');
console.log(' - ImageMagick:', tools.imagemagick ? '✓ Available' : '✗ Not found');
if (!tools.pdftoppm && !tools.imagemagick) {
console.log('\n⚠ Warning: No PDF conversion tools found!');
console.log(' Install with: apt-get install poppler-utils imagemagick\n');
}
// 2. Check Meilisearch connection
console.log('\n2. Checking Meilisearch connection...');
try {
const index = await getMeilisearchIndex();
const stats = await index.getStats();
console.log(` ✓ Connected to index: ${stats.numberOfDocuments} documents indexed`);
} catch (error) {
console.log(` ✗ Meilisearch error: ${error.message}`);
console.log(' Make sure Meilisearch is running on port 7700');
}
// 3. Check database connection
console.log('\n3. Checking database connection...');
try {
const db = getDb();
const result = db.prepare('SELECT COUNT(*) as count FROM documents').get();
console.log(` ✓ Database connected: ${result.count} documents found`);
} catch (error) {
console.log(` ✗ Database error: ${error.message}`);
}
// 4. Check Redis connection (for BullMQ)
console.log('\n4. Checking Redis connection...');
try {
const Redis = (await import('ioredis')).default;
const redis = new Redis({
host: process.env.REDIS_HOST || '127.0.0.1',
port: process.env.REDIS_PORT || 6379
});
await redis.ping();
console.log(' ✓ Redis connected');
await redis.quit();
} catch (error) {
console.log(` ✗ Redis error: ${error.message}`);
console.log(' Start Redis with: docker run -d -p 6379:6379 redis:alpine');
}
// 5. Check Tesseract
console.log('\n5. Checking Tesseract OCR...');
try {
const { execSync } = await import('child_process');
const version = execSync('tesseract --version', { encoding: 'utf8' });
console.log(' ✓ Tesseract installed');
console.log(' ' + version.split('\n')[0]);
} catch (error) {
console.log(' ✗ Tesseract not found');
console.log(' Install with: apt-get install tesseract-ocr');
}
console.log('\n✅ OCR Pipeline Test Complete\n');
}
// Run test
testOCRPipeline().catch(error => {
console.error('Test failed:', error);
process.exit(1);
});