This commit addresses multiple critical fixes and adds new functionality for the NaviDocs local testing environment (port 8083): Search Fixes: - Fixed search to use backend /api/search instead of direct Meilisearch - Resolves network accessibility issue when accessing from external IPs - Search now works from http://172.29.75.55:8083/search PDF Text Selection: - Added PDF.js text layer for selectable text - Imported pdf_viewer.css for proper text layer styling - Changed text layer opacity to 1 for better interaction - Added user-select: text for improved text selection - Pink selection highlight (rgba(255, 92, 178, 0.3)) Database Cleanup: - Created cleanup scripts to remove 20 duplicate documents - Removed 753 orphaned entries from Meilisearch index - Cleaned 17 document folders from filesystem - Kept only newest version of each document - Scripts: clean-duplicates.js, clean-meilisearch-orphans.js Auto-Fill Feature: - New /api/upload/quick-ocr endpoint for first-page OCR - Automatically extracts metadata from PDFs on file selection - Detects: boat make, model, year, name, and document title - Checks both OCR text and filename for boat name - Auto-fills upload form with extracted data - Shows loading indicator during metadata extraction - Graceful fallback to filename if OCR fails Tenant Management: - Updated organization ID to use boat name as tenant - Falls back to "Liliane 1" for single-tenant setup - Each boat becomes a unique tenant in the system Files Changed: - client/src/views/DocumentView.vue - Text layer implementation - client/src/composables/useSearch.js - Backend API integration - client/src/components/UploadModal.vue - Auto-fill feature - server/routes/quick-ocr.js - OCR endpoint (new) - server/index.js - Route registration - server/scripts/* - Cleanup utilities (new) Testing: All features tested on local deployment at http://172.29.75.55:8083 - Backend: http://localhost:8001 - Frontend: http://localhost:8083 - Meilisearch: http://localhost:7700 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
392 lines
11 KiB
JavaScript
392 lines
11 KiB
JavaScript
#!/usr/bin/env node
|
||
/**
|
||
* End-to-End Test for Complete Image Extraction System
|
||
* Tests: Upload → OCR → Image Extraction → API → Frontend Integration
|
||
*/
|
||
|
||
import fetch from 'node-fetch';
|
||
import FormData from 'form-data';
|
||
import fs from 'fs';
|
||
import path from 'path';
|
||
import { fileURLToPath } from 'url';
|
||
import Database from 'better-sqlite3';
|
||
|
||
const __filename = fileURLToPath(import.meta.url);
|
||
const __dirname = path.dirname(__filename);
|
||
|
||
const API_URL = 'http://localhost:8001';
|
||
const DB_PATH = path.join(__dirname, 'db/navidocs.db');
|
||
|
||
console.log('\n🧪 Starting Complete System E2E Test\n');
|
||
console.log('=' .repeat(60));
|
||
|
||
// Test configuration
|
||
const TEST_ORG_ID = 'test-org-123';
|
||
const TEST_PDF = path.join(__dirname, '../test/data/05-versions-space.pdf');
|
||
|
||
async function sleep(ms) {
|
||
return new Promise(resolve => setTimeout(resolve, ms));
|
||
}
|
||
|
||
async function testHealthCheck() {
|
||
console.log('\n1️⃣ Testing Backend Health...');
|
||
|
||
try {
|
||
const response = await fetch(`${API_URL}/health`);
|
||
const data = await response.json();
|
||
|
||
if (response.ok && data.status === 'ok') {
|
||
console.log(' ✅ Backend is healthy');
|
||
console.log(` 📊 Uptime: ${(data.uptime / 1000).toFixed(2)}s`);
|
||
return true;
|
||
} else {
|
||
console.log(' ❌ Backend health check failed');
|
||
return false;
|
||
}
|
||
} catch (error) {
|
||
console.log(` ❌ Backend not reachable: ${error.message}`);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function testUpload() {
|
||
console.log('\n2️⃣ Testing PDF Upload...');
|
||
|
||
// Check if test PDF exists
|
||
if (!fs.existsSync(TEST_PDF)) {
|
||
console.log(` ⚠️ Sample PDF not found at ${TEST_PDF}`);
|
||
console.log(' 📝 Creating a simple 2-page test PDF...');
|
||
|
||
// Use a different test PDF if sample doesn't exist
|
||
const alternativePdf = path.join(__dirname, 'test-docs/sample.pdf');
|
||
if (fs.existsSync(alternativePdf)) {
|
||
console.log(` ✅ Using alternative PDF: ${alternativePdf}`);
|
||
return testUploadFile(alternativePdf);
|
||
}
|
||
|
||
console.log(' ❌ No test PDF available. Please create one.');
|
||
return null;
|
||
}
|
||
|
||
return testUploadFile(TEST_PDF);
|
||
}
|
||
|
||
async function testUploadFile(pdfPath) {
|
||
try {
|
||
const form = new FormData();
|
||
form.append('file', fs.createReadStream(pdfPath));
|
||
form.append('organizationId', TEST_ORG_ID);
|
||
form.append('title', 'E2E Test Document');
|
||
form.append('documentType', 'owner-manual');
|
||
form.append('description', 'Testing image extraction system');
|
||
|
||
const response = await fetch(`${API_URL}/api/upload`, {
|
||
method: 'POST',
|
||
body: form,
|
||
headers: form.getHeaders()
|
||
});
|
||
|
||
if (!response.ok) {
|
||
const error = await response.text();
|
||
console.log(` ❌ Upload failed: ${response.status} ${error}`);
|
||
return null;
|
||
}
|
||
|
||
const data = await response.json();
|
||
console.log(' ✅ PDF uploaded successfully');
|
||
console.log(` 📄 Document ID: ${data.documentId}`);
|
||
console.log(` 📋 Job ID: ${data.jobId}`);
|
||
|
||
return data.documentId;
|
||
} catch (error) {
|
||
console.log(` ❌ Upload error: ${error.message}`);
|
||
return null;
|
||
}
|
||
}
|
||
|
||
async function waitForOCRCompletion(documentId, maxWaitSeconds = 60) {
|
||
console.log('\n3️⃣ Waiting for OCR Processing (including image extraction)...');
|
||
|
||
const db = new Database(DB_PATH);
|
||
const startTime = Date.now();
|
||
|
||
while ((Date.now() - startTime) / 1000 < maxWaitSeconds) {
|
||
const doc = db.prepare('SELECT status FROM documents WHERE id = ?').get(documentId);
|
||
|
||
if (!doc) {
|
||
console.log(' ❌ Document not found in database');
|
||
db.close();
|
||
return false;
|
||
}
|
||
|
||
console.log(` ⏳ Status: ${doc.status}`);
|
||
|
||
if (doc.status === 'indexed') {
|
||
console.log(' ✅ OCR processing complete!');
|
||
db.close();
|
||
return true;
|
||
}
|
||
|
||
if (doc.status === 'failed') {
|
||
console.log(' ❌ OCR processing failed');
|
||
db.close();
|
||
return false;
|
||
}
|
||
|
||
await sleep(2000); // Check every 2 seconds
|
||
}
|
||
|
||
console.log(' ⏱️ Timeout waiting for OCR completion');
|
||
db.close();
|
||
return false;
|
||
}
|
||
|
||
async function testImageExtraction(documentId) {
|
||
console.log('\n4️⃣ Testing Image Extraction Results...');
|
||
|
||
const db = new Database(DB_PATH);
|
||
|
||
try {
|
||
// Check document status
|
||
const doc = db.prepare(`
|
||
SELECT id, status, imagesExtracted, imageCount
|
||
FROM documents
|
||
WHERE id = ?
|
||
`).get(documentId);
|
||
|
||
console.log(` 📊 Document Status: ${doc.status}`);
|
||
console.log(` 🖼️ Images Extracted: ${doc.imagesExtracted ? 'Yes' : 'No'}`);
|
||
console.log(` 📈 Image Count: ${doc.imageCount || 0}`);
|
||
|
||
// Check extracted images
|
||
const images = db.prepare(`
|
||
SELECT id, pageNumber, imageIndex, extractedText, textConfidence,
|
||
imagePath, width, height
|
||
FROM document_images
|
||
WHERE documentId = ?
|
||
ORDER BY pageNumber, imageIndex
|
||
`).all(documentId);
|
||
|
||
if (images.length === 0) {
|
||
console.log(' ⚠️ No images extracted (PDF may not contain images)');
|
||
db.close();
|
||
return { success: true, imageCount: 0 };
|
||
}
|
||
|
||
console.log(` ✅ Found ${images.length} extracted images`);
|
||
|
||
images.forEach((img, index) => {
|
||
console.log(`\n Image ${index + 1}:`);
|
||
console.log(` Page: ${img.pageNumber}, Index: ${img.imageIndex}`);
|
||
console.log(` Size: ${img.width}x${img.height}px`);
|
||
console.log(` Path: ${img.imagePath}`);
|
||
|
||
if (img.extractedText) {
|
||
const textPreview = img.extractedText.substring(0, 80);
|
||
console.log(` OCR Text: "${textPreview}..."`);
|
||
console.log(` Confidence: ${(img.textConfidence * 100).toFixed(1)}%`);
|
||
} else {
|
||
console.log(` OCR Text: (empty)`);
|
||
}
|
||
|
||
// Check if image file exists
|
||
const imagePath = path.join(__dirname, '../', img.imagePath);
|
||
if (fs.existsSync(imagePath)) {
|
||
const stats = fs.statSync(imagePath);
|
||
console.log(` File Size: ${(stats.size / 1024).toFixed(1)} KB`);
|
||
} else {
|
||
console.log(` ⚠️ Image file not found: ${imagePath}`);
|
||
}
|
||
});
|
||
|
||
db.close();
|
||
return { success: true, imageCount: images.length, images };
|
||
} catch (error) {
|
||
console.log(` ❌ Error checking images: ${error.message}`);
|
||
db.close();
|
||
return { success: false, imageCount: 0 };
|
||
}
|
||
}
|
||
|
||
async function testImageAPI(documentId) {
|
||
console.log('\n5️⃣ Testing Image API Endpoints...');
|
||
|
||
try {
|
||
// Test: Get all images for document
|
||
console.log(' 📡 GET /api/documents/:id/images');
|
||
const response = await fetch(`${API_URL}/api/documents/${documentId}/images`);
|
||
|
||
if (!response.ok) {
|
||
console.log(` ❌ API request failed: ${response.status}`);
|
||
return false;
|
||
}
|
||
|
||
const data = await response.json();
|
||
console.log(` ✅ API returned ${data.images.length} images`);
|
||
|
||
if (data.images.length === 0) {
|
||
console.log(' ⚠️ No images in API response');
|
||
return true; // Not an error, PDF just doesn't have images
|
||
}
|
||
|
||
// Test: Get specific image file
|
||
const firstImage = data.images[0];
|
||
console.log(`\n 📡 GET /api/images/${firstImage.id}`);
|
||
const imageResponse = await fetch(`${API_URL}/api/images/${firstImage.id}`);
|
||
|
||
if (!imageResponse.ok) {
|
||
console.log(` ❌ Image file request failed: ${imageResponse.status}`);
|
||
return false;
|
||
}
|
||
|
||
const contentType = imageResponse.headers.get('content-type');
|
||
const buffer = await imageResponse.buffer();
|
||
|
||
console.log(` ✅ Image file retrieved`);
|
||
console.log(` Content-Type: ${contentType}`);
|
||
console.log(` Size: ${(buffer.length / 1024).toFixed(1)} KB`);
|
||
|
||
return true;
|
||
} catch (error) {
|
||
console.log(` ❌ API test error: ${error.message}`);
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function testMeilisearchIndexing(documentId) {
|
||
console.log('\n6️⃣ Testing Meilisearch Image Indexing...');
|
||
|
||
const db = new Database(DB_PATH);
|
||
|
||
try {
|
||
const images = db.prepare(`
|
||
SELECT id, extractedText
|
||
FROM document_images
|
||
WHERE documentId = ? AND extractedText IS NOT NULL AND extractedText != ''
|
||
`).all(documentId);
|
||
|
||
db.close();
|
||
|
||
if (images.length === 0) {
|
||
console.log(' ⚠️ No images with OCR text to search');
|
||
return true;
|
||
}
|
||
|
||
console.log(` 🔍 Testing search for image text...`);
|
||
|
||
// Pick a word from first image's text
|
||
const searchText = images[0].extractedText.split(' ').slice(0, 2).join(' ');
|
||
console.log(` 🔎 Searching for: "${searchText}"`);
|
||
|
||
const response = await fetch(`${API_URL}/api/search`, {
|
||
method: 'POST',
|
||
headers: { 'Content-Type': 'application/json' },
|
||
body: JSON.stringify({
|
||
q: searchText,
|
||
organizationId: TEST_ORG_ID
|
||
})
|
||
});
|
||
|
||
if (!response.ok) {
|
||
console.log(` ⚠️ Search request failed: ${response.status}`);
|
||
return false;
|
||
}
|
||
|
||
const results = await response.json();
|
||
|
||
const imageResults = results.hits?.filter(h => h.documentType === 'image') || [];
|
||
console.log(` ✅ Found ${imageResults.length} image results`);
|
||
|
||
if (imageResults.length > 0) {
|
||
console.log(` 🎯 Image search is working!`);
|
||
imageResults.forEach((result, idx) => {
|
||
console.log(` Result ${idx + 1}: Page ${result.pageNumber}`);
|
||
});
|
||
}
|
||
|
||
return true;
|
||
} catch (error) {
|
||
console.log(` ❌ Search test error: ${error.message}`);
|
||
db.close();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function testCleanup(documentId) {
|
||
console.log('\n7️⃣ Cleaning up test data...');
|
||
|
||
const db = new Database(DB_PATH);
|
||
|
||
try {
|
||
// Delete document (cascade will delete images)
|
||
const result = db.prepare('DELETE FROM documents WHERE id = ?').run(documentId);
|
||
|
||
console.log(` 🗑️ Deleted ${result.changes} document(s)`);
|
||
|
||
// Delete uploaded files
|
||
const uploadsDir = path.join(__dirname, '../uploads', documentId);
|
||
if (fs.existsSync(uploadsDir)) {
|
||
fs.rmSync(uploadsDir, { recursive: true });
|
||
console.log(' 🗑️ Deleted uploaded files');
|
||
}
|
||
|
||
db.close();
|
||
console.log(' ✅ Cleanup complete');
|
||
return true;
|
||
} catch (error) {
|
||
console.log(` ❌ Cleanup error: ${error.message}`);
|
||
db.close();
|
||
return false;
|
||
}
|
||
}
|
||
|
||
async function runFullTest() {
|
||
try {
|
||
// Test 1: Health Check
|
||
const healthOk = await testHealthCheck();
|
||
if (!healthOk) {
|
||
console.log('\n❌ Backend is not healthy. Aborting tests.');
|
||
return;
|
||
}
|
||
|
||
// Test 2: Upload
|
||
const documentId = await testUpload();
|
||
if (!documentId) {
|
||
console.log('\n❌ Upload failed. Aborting tests.');
|
||
return;
|
||
}
|
||
|
||
// Test 3: Wait for OCR
|
||
const ocrComplete = await waitForOCRCompletion(documentId, 90);
|
||
if (!ocrComplete) {
|
||
console.log('\n⚠️ OCR did not complete in time. Continuing anyway...');
|
||
}
|
||
|
||
// Test 4: Check Image Extraction
|
||
const imageResult = await testImageExtraction(documentId);
|
||
|
||
// Test 5: Test API Endpoints
|
||
if (imageResult.imageCount > 0) {
|
||
await testImageAPI(documentId);
|
||
}
|
||
|
||
// Test 6: Test Meilisearch
|
||
if (imageResult.imageCount > 0) {
|
||
await testMeilisearchIndexing(documentId);
|
||
}
|
||
|
||
// Test 7: Cleanup
|
||
console.log('\n❓ Keep test data? (will auto-delete in 10s)');
|
||
await sleep(10000);
|
||
await testCleanup(documentId);
|
||
|
||
console.log('\n' + '='.repeat(60));
|
||
console.log('✅ E2E Test Complete!');
|
||
console.log('='.repeat(60) + '\n');
|
||
|
||
} catch (error) {
|
||
console.error('\n💥 Test suite error:', error);
|
||
}
|
||
}
|
||
|
||
runFullTest();
|