navidocs/server/test-image-system-e2e.js
ggq-admin d461c5742f Fix search, add PDF text selection, clean duplicates, implement auto-fill
This commit addresses multiple critical fixes and adds new functionality
for the NaviDocs local testing environment (port 8083):

Search Fixes:
- Fixed search to use backend /api/search instead of direct Meilisearch
- Resolves network accessibility issue when accessing from external IPs
- Search now works from http://172.29.75.55:8083/search

PDF Text Selection:
- Added PDF.js text layer for selectable text
- Imported pdf_viewer.css for proper text layer styling
- Changed text layer opacity to 1 for better interaction
- Added user-select: text for improved text selection
- Pink selection highlight (rgba(255, 92, 178, 0.3))

Database Cleanup:
- Created cleanup scripts to remove 20 duplicate documents
- Removed 753 orphaned entries from Meilisearch index
- Cleaned 17 document folders from filesystem
- Kept only newest version of each document
- Scripts: clean-duplicates.js, clean-meilisearch-orphans.js

Auto-Fill Feature:
- New /api/upload/quick-ocr endpoint for first-page OCR
- Automatically extracts metadata from PDFs on file selection
- Detects: boat make, model, year, name, and document title
- Checks both OCR text and filename for boat name
- Auto-fills upload form with extracted data
- Shows loading indicator during metadata extraction
- Graceful fallback to filename if OCR fails

Tenant Management:
- Updated organization ID to use boat name as tenant
- Falls back to "Liliane 1" for single-tenant setup
- Each boat becomes a unique tenant in the system

Files Changed:
- client/src/views/DocumentView.vue - Text layer implementation
- client/src/composables/useSearch.js - Backend API integration
- client/src/components/UploadModal.vue - Auto-fill feature
- server/routes/quick-ocr.js - OCR endpoint (new)
- server/index.js - Route registration
- server/scripts/* - Cleanup utilities (new)

Testing:
All features tested on local deployment at http://172.29.75.55:8083
- Backend: http://localhost:8001
- Frontend: http://localhost:8083
- Meilisearch: http://localhost:7700

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-20 01:35:06 +02:00

392 lines
11 KiB
JavaScript
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env node
/**
* End-to-End Test for Complete Image Extraction System
* Tests: Upload → OCR → Image Extraction → API → Frontend Integration
*/
import fetch from 'node-fetch';
import FormData from 'form-data';
import fs from 'fs';
import path from 'path';
import { fileURLToPath } from 'url';
import Database from 'better-sqlite3';
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
const API_URL = 'http://localhost:8001';
const DB_PATH = path.join(__dirname, 'db/navidocs.db');
console.log('\n🧪 Starting Complete System E2E Test\n');
console.log('=' .repeat(60));
// Test configuration
const TEST_ORG_ID = 'test-org-123';
const TEST_PDF = path.join(__dirname, '../test/data/05-versions-space.pdf');
async function sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async function testHealthCheck() {
console.log('\n1⃣ Testing Backend Health...');
try {
const response = await fetch(`${API_URL}/health`);
const data = await response.json();
if (response.ok && data.status === 'ok') {
console.log(' ✅ Backend is healthy');
console.log(` 📊 Uptime: ${(data.uptime / 1000).toFixed(2)}s`);
return true;
} else {
console.log(' ❌ Backend health check failed');
return false;
}
} catch (error) {
console.log(` ❌ Backend not reachable: ${error.message}`);
return false;
}
}
async function testUpload() {
console.log('\n2⃣ Testing PDF Upload...');
// Check if test PDF exists
if (!fs.existsSync(TEST_PDF)) {
console.log(` ⚠️ Sample PDF not found at ${TEST_PDF}`);
console.log(' 📝 Creating a simple 2-page test PDF...');
// Use a different test PDF if sample doesn't exist
const alternativePdf = path.join(__dirname, 'test-docs/sample.pdf');
if (fs.existsSync(alternativePdf)) {
console.log(` ✅ Using alternative PDF: ${alternativePdf}`);
return testUploadFile(alternativePdf);
}
console.log(' ❌ No test PDF available. Please create one.');
return null;
}
return testUploadFile(TEST_PDF);
}
async function testUploadFile(pdfPath) {
try {
const form = new FormData();
form.append('file', fs.createReadStream(pdfPath));
form.append('organizationId', TEST_ORG_ID);
form.append('title', 'E2E Test Document');
form.append('documentType', 'owner-manual');
form.append('description', 'Testing image extraction system');
const response = await fetch(`${API_URL}/api/upload`, {
method: 'POST',
body: form,
headers: form.getHeaders()
});
if (!response.ok) {
const error = await response.text();
console.log(` ❌ Upload failed: ${response.status} ${error}`);
return null;
}
const data = await response.json();
console.log(' ✅ PDF uploaded successfully');
console.log(` 📄 Document ID: ${data.documentId}`);
console.log(` 📋 Job ID: ${data.jobId}`);
return data.documentId;
} catch (error) {
console.log(` ❌ Upload error: ${error.message}`);
return null;
}
}
async function waitForOCRCompletion(documentId, maxWaitSeconds = 60) {
console.log('\n3⃣ Waiting for OCR Processing (including image extraction)...');
const db = new Database(DB_PATH);
const startTime = Date.now();
while ((Date.now() - startTime) / 1000 < maxWaitSeconds) {
const doc = db.prepare('SELECT status FROM documents WHERE id = ?').get(documentId);
if (!doc) {
console.log(' ❌ Document not found in database');
db.close();
return false;
}
console.log(` ⏳ Status: ${doc.status}`);
if (doc.status === 'indexed') {
console.log(' ✅ OCR processing complete!');
db.close();
return true;
}
if (doc.status === 'failed') {
console.log(' ❌ OCR processing failed');
db.close();
return false;
}
await sleep(2000); // Check every 2 seconds
}
console.log(' ⏱️ Timeout waiting for OCR completion');
db.close();
return false;
}
async function testImageExtraction(documentId) {
console.log('\n4⃣ Testing Image Extraction Results...');
const db = new Database(DB_PATH);
try {
// Check document status
const doc = db.prepare(`
SELECT id, status, imagesExtracted, imageCount
FROM documents
WHERE id = ?
`).get(documentId);
console.log(` 📊 Document Status: ${doc.status}`);
console.log(` 🖼️ Images Extracted: ${doc.imagesExtracted ? 'Yes' : 'No'}`);
console.log(` 📈 Image Count: ${doc.imageCount || 0}`);
// Check extracted images
const images = db.prepare(`
SELECT id, pageNumber, imageIndex, extractedText, textConfidence,
imagePath, width, height
FROM document_images
WHERE documentId = ?
ORDER BY pageNumber, imageIndex
`).all(documentId);
if (images.length === 0) {
console.log(' ⚠️ No images extracted (PDF may not contain images)');
db.close();
return { success: true, imageCount: 0 };
}
console.log(` ✅ Found ${images.length} extracted images`);
images.forEach((img, index) => {
console.log(`\n Image ${index + 1}:`);
console.log(` Page: ${img.pageNumber}, Index: ${img.imageIndex}`);
console.log(` Size: ${img.width}x${img.height}px`);
console.log(` Path: ${img.imagePath}`);
if (img.extractedText) {
const textPreview = img.extractedText.substring(0, 80);
console.log(` OCR Text: "${textPreview}..."`);
console.log(` Confidence: ${(img.textConfidence * 100).toFixed(1)}%`);
} else {
console.log(` OCR Text: (empty)`);
}
// Check if image file exists
const imagePath = path.join(__dirname, '../', img.imagePath);
if (fs.existsSync(imagePath)) {
const stats = fs.statSync(imagePath);
console.log(` File Size: ${(stats.size / 1024).toFixed(1)} KB`);
} else {
console.log(` ⚠️ Image file not found: ${imagePath}`);
}
});
db.close();
return { success: true, imageCount: images.length, images };
} catch (error) {
console.log(` ❌ Error checking images: ${error.message}`);
db.close();
return { success: false, imageCount: 0 };
}
}
async function testImageAPI(documentId) {
console.log('\n5⃣ Testing Image API Endpoints...');
try {
// Test: Get all images for document
console.log(' 📡 GET /api/documents/:id/images');
const response = await fetch(`${API_URL}/api/documents/${documentId}/images`);
if (!response.ok) {
console.log(` ❌ API request failed: ${response.status}`);
return false;
}
const data = await response.json();
console.log(` ✅ API returned ${data.images.length} images`);
if (data.images.length === 0) {
console.log(' ⚠️ No images in API response');
return true; // Not an error, PDF just doesn't have images
}
// Test: Get specific image file
const firstImage = data.images[0];
console.log(`\n 📡 GET /api/images/${firstImage.id}`);
const imageResponse = await fetch(`${API_URL}/api/images/${firstImage.id}`);
if (!imageResponse.ok) {
console.log(` ❌ Image file request failed: ${imageResponse.status}`);
return false;
}
const contentType = imageResponse.headers.get('content-type');
const buffer = await imageResponse.buffer();
console.log(` ✅ Image file retrieved`);
console.log(` Content-Type: ${contentType}`);
console.log(` Size: ${(buffer.length / 1024).toFixed(1)} KB`);
return true;
} catch (error) {
console.log(` ❌ API test error: ${error.message}`);
return false;
}
}
async function testMeilisearchIndexing(documentId) {
console.log('\n6⃣ Testing Meilisearch Image Indexing...');
const db = new Database(DB_PATH);
try {
const images = db.prepare(`
SELECT id, extractedText
FROM document_images
WHERE documentId = ? AND extractedText IS NOT NULL AND extractedText != ''
`).all(documentId);
db.close();
if (images.length === 0) {
console.log(' ⚠️ No images with OCR text to search');
return true;
}
console.log(` 🔍 Testing search for image text...`);
// Pick a word from first image's text
const searchText = images[0].extractedText.split(' ').slice(0, 2).join(' ');
console.log(` 🔎 Searching for: "${searchText}"`);
const response = await fetch(`${API_URL}/api/search`, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
q: searchText,
organizationId: TEST_ORG_ID
})
});
if (!response.ok) {
console.log(` ⚠️ Search request failed: ${response.status}`);
return false;
}
const results = await response.json();
const imageResults = results.hits?.filter(h => h.documentType === 'image') || [];
console.log(` ✅ Found ${imageResults.length} image results`);
if (imageResults.length > 0) {
console.log(` 🎯 Image search is working!`);
imageResults.forEach((result, idx) => {
console.log(` Result ${idx + 1}: Page ${result.pageNumber}`);
});
}
return true;
} catch (error) {
console.log(` ❌ Search test error: ${error.message}`);
db.close();
return false;
}
}
async function testCleanup(documentId) {
console.log('\n7⃣ Cleaning up test data...');
const db = new Database(DB_PATH);
try {
// Delete document (cascade will delete images)
const result = db.prepare('DELETE FROM documents WHERE id = ?').run(documentId);
console.log(` 🗑️ Deleted ${result.changes} document(s)`);
// Delete uploaded files
const uploadsDir = path.join(__dirname, '../uploads', documentId);
if (fs.existsSync(uploadsDir)) {
fs.rmSync(uploadsDir, { recursive: true });
console.log(' 🗑️ Deleted uploaded files');
}
db.close();
console.log(' ✅ Cleanup complete');
return true;
} catch (error) {
console.log(` ❌ Cleanup error: ${error.message}`);
db.close();
return false;
}
}
async function runFullTest() {
try {
// Test 1: Health Check
const healthOk = await testHealthCheck();
if (!healthOk) {
console.log('\n❌ Backend is not healthy. Aborting tests.');
return;
}
// Test 2: Upload
const documentId = await testUpload();
if (!documentId) {
console.log('\n❌ Upload failed. Aborting tests.');
return;
}
// Test 3: Wait for OCR
const ocrComplete = await waitForOCRCompletion(documentId, 90);
if (!ocrComplete) {
console.log('\n⚠ OCR did not complete in time. Continuing anyway...');
}
// Test 4: Check Image Extraction
const imageResult = await testImageExtraction(documentId);
// Test 5: Test API Endpoints
if (imageResult.imageCount > 0) {
await testImageAPI(documentId);
}
// Test 6: Test Meilisearch
if (imageResult.imageCount > 0) {
await testMeilisearchIndexing(documentId);
}
// Test 7: Cleanup
console.log('\n❓ Keep test data? (will auto-delete in 10s)');
await sleep(10000);
await testCleanup(documentId);
console.log('\n' + '='.repeat(60));
console.log('✅ E2E Test Complete!');
console.log('='.repeat(60) + '\n');
} catch (error) {
console.error('\n💥 Test suite error:', error);
}
}
runFullTest();