## Backend (server/) - Express 5 API with security middleware (helmet, rate limiting) - SQLite database with WAL mode (schema from docs/architecture/) - Meilisearch integration with tenant tokens - BullMQ + Redis background job queue - OCR pipeline with Tesseract.js - File safety validation (extension, MIME, size) - 4 API route modules: upload, jobs, search, documents ## Frontend (client/) - Vue 3 with Composition API (<script setup>) - Vite 5 build system with HMR - Tailwind CSS (Meilisearch-inspired design) - UploadModal with drag-and-drop - FigureZoom component (ported from lilian1) - Meilisearch search integration with tenant tokens - Job polling composable - Clean SVG icons (no emojis) ## Code Extraction - ✅ manuals.js → UploadModal.vue, useJobPolling.js - ✅ figure-zoom.js → FigureZoom.vue - ✅ service-worker.js → client/public/service-worker.js (TODO) - ✅ glossary.json → Merged into Meilisearch synonyms - ❌ Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk) ## Documentation - Complete extraction plan in docs/analysis/ - README with quick start guide - Architecture summary in docs/architecture/ ## Build Status - Server dependencies: ✅ Installed (234 packages) - Client dependencies: ✅ Installed (160 packages) - Client build: ✅ Successful (2.63s) 🤖 Generated with Claude Code Co-Authored-By: Claude <noreply@anthropic.com>
376 lines
11 KiB
JavaScript
376 lines
11 KiB
JavaScript
/**
|
|
* Search Service - Meilisearch indexing and search operations
|
|
*
|
|
* Features:
|
|
* - Index document pages in Meilisearch
|
|
* - Build proper document structure from schema
|
|
* - Handle metadata enrichment
|
|
* - Support multi-vertical indexing (boat, marina, property)
|
|
*/
|
|
|
|
import { getMeilisearchIndex } from '../config/meilisearch.js';
|
|
import { getDb } from '../config/db.js';
|
|
|
|
/**
|
|
* Index a document page in Meilisearch
|
|
*
|
|
* @param {Object} pageData - Page data to index
|
|
* @param {string} pageData.pageId - Document page ID
|
|
* @param {string} pageData.documentId - Document ID
|
|
* @param {number} pageData.pageNumber - Page number (1-based)
|
|
* @param {string} pageData.text - OCR extracted text
|
|
* @param {number} pageData.confidence - OCR confidence (0-1)
|
|
* @returns {Promise<Object>} - Indexing result
|
|
*/
|
|
export async function indexDocumentPage(pageData) {
|
|
try {
|
|
const db = getDb();
|
|
|
|
// Fetch full document and entity metadata
|
|
const document = db.prepare(`
|
|
SELECT
|
|
d.*,
|
|
e.name as entity_name,
|
|
e.entity_type,
|
|
e.make as boat_make,
|
|
e.model as boat_model,
|
|
e.year as boat_year,
|
|
e.vessel_type,
|
|
e.property_type,
|
|
se.name as sub_entity_name,
|
|
c.name as component_name,
|
|
c.manufacturer,
|
|
c.model_number,
|
|
c.serial_number,
|
|
o.name as organization_name
|
|
FROM documents d
|
|
LEFT JOIN entities e ON d.entity_id = e.id
|
|
LEFT JOIN sub_entities se ON d.sub_entity_id = se.id
|
|
LEFT JOIN components c ON d.component_id = c.id
|
|
LEFT JOIN organizations o ON d.organization_id = o.id
|
|
WHERE d.id = ?
|
|
`).get(pageData.documentId);
|
|
|
|
if (!document) {
|
|
throw new Error(`Document not found: ${pageData.documentId}`);
|
|
}
|
|
|
|
// Parse metadata JSON fields
|
|
const documentMetadata = document.metadata ? JSON.parse(document.metadata) : {};
|
|
|
|
// Build Meilisearch document according to schema
|
|
const searchDocument = buildSearchDocument(pageData, document, documentMetadata);
|
|
|
|
// Get Meilisearch index
|
|
const index = await getMeilisearchIndex();
|
|
|
|
// Add document to index
|
|
const result = await index.addDocuments([searchDocument]);
|
|
|
|
console.log(`Indexed page ${pageData.pageNumber} of document ${pageData.documentId}`);
|
|
|
|
// Update document_pages table with search metadata
|
|
db.prepare(`
|
|
UPDATE document_pages
|
|
SET search_indexed_at = ?,
|
|
meilisearch_id = ?
|
|
WHERE id = ?
|
|
`).run(
|
|
Math.floor(Date.now() / 1000),
|
|
searchDocument.id,
|
|
pageData.pageId
|
|
);
|
|
|
|
return {
|
|
success: true,
|
|
documentId: searchDocument.id,
|
|
taskUid: result.taskUid
|
|
};
|
|
} catch (error) {
|
|
console.error('Error indexing document page:', error);
|
|
throw new Error(`Failed to index page: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Build Meilisearch document structure from page data and metadata
|
|
*
|
|
* Follows schema defined in docs/architecture/meilisearch-config.json
|
|
*
|
|
* @param {Object} pageData - Page OCR data
|
|
* @param {Object} document - Document database record
|
|
* @param {Object} metadata - Parsed document metadata
|
|
* @returns {Object} - Meilisearch document
|
|
*/
|
|
function buildSearchDocument(pageData, document, metadata) {
|
|
const now = Math.floor(Date.now() / 1000);
|
|
|
|
// Determine vertical based on entity type
|
|
const vertical = getVerticalFromEntityType(document.entity_type);
|
|
|
|
// Base document structure
|
|
const searchDoc = {
|
|
// Required fields
|
|
id: `page_${document.id}_p${pageData.pageNumber}`,
|
|
vertical: vertical,
|
|
|
|
organizationId: document.organization_id,
|
|
organizationName: document.organization_name || 'Unknown Organization',
|
|
|
|
entityId: document.entity_id || 'unknown',
|
|
entityName: document.entity_name || 'Unknown Entity',
|
|
entityType: document.entity_type || 'unknown',
|
|
|
|
docId: document.id,
|
|
userId: document.uploaded_by,
|
|
|
|
documentType: document.document_type || 'manual',
|
|
title: metadata.title || document.title || `Page ${pageData.pageNumber}`,
|
|
pageNumber: pageData.pageNumber,
|
|
text: pageData.text,
|
|
|
|
language: document.language || 'en',
|
|
ocrConfidence: pageData.confidence,
|
|
|
|
createdAt: document.created_at,
|
|
updatedAt: now
|
|
};
|
|
|
|
// Optional: Sub-entity (system, dock, unit)
|
|
if (document.sub_entity_id) {
|
|
searchDoc.subEntityId = document.sub_entity_id;
|
|
searchDoc.subEntityName = document.sub_entity_name;
|
|
}
|
|
|
|
// Optional: Component
|
|
if (document.component_id) {
|
|
searchDoc.componentId = document.component_id;
|
|
searchDoc.componentName = document.component_name;
|
|
searchDoc.manufacturer = document.manufacturer;
|
|
searchDoc.modelNumber = document.model_number;
|
|
searchDoc.serialNumber = document.serial_number;
|
|
}
|
|
|
|
// Optional: Categorization
|
|
if (metadata.systems) {
|
|
searchDoc.systems = Array.isArray(metadata.systems) ? metadata.systems : [metadata.systems];
|
|
}
|
|
if (metadata.categories) {
|
|
searchDoc.categories = Array.isArray(metadata.categories) ? metadata.categories : [metadata.categories];
|
|
}
|
|
if (metadata.tags) {
|
|
searchDoc.tags = Array.isArray(metadata.tags) ? metadata.tags : [metadata.tags];
|
|
}
|
|
|
|
// Boating vertical fields
|
|
if (vertical === 'boating') {
|
|
searchDoc.boatName = document.entity_name;
|
|
if (document.boat_make) searchDoc.boatMake = document.boat_make;
|
|
if (document.boat_model) searchDoc.boatModel = document.boat_model;
|
|
if (document.boat_year) searchDoc.boatYear = document.boat_year;
|
|
if (document.vessel_type) searchDoc.vesselType = document.vessel_type;
|
|
}
|
|
|
|
// Property/Marina vertical fields
|
|
if (vertical === 'property' || vertical === 'marina') {
|
|
if (document.property_type) searchDoc.propertyType = document.property_type;
|
|
if (document.facility_type) searchDoc.facilityType = document.facility_type;
|
|
}
|
|
|
|
// Optional: Priority and offline caching
|
|
if (metadata.priority) {
|
|
searchDoc.priority = metadata.priority;
|
|
}
|
|
if (metadata.offlineCache !== undefined) {
|
|
searchDoc.offlineCache = metadata.offlineCache;
|
|
}
|
|
|
|
// Optional: Compliance/Inspection data
|
|
if (metadata.complianceType) searchDoc.complianceType = metadata.complianceType;
|
|
if (metadata.inspectionDate) searchDoc.inspectionDate = metadata.inspectionDate;
|
|
if (metadata.nextDue) searchDoc.nextDue = metadata.nextDue;
|
|
if (metadata.status) searchDoc.status = metadata.status;
|
|
|
|
// Optional: Location data
|
|
if (metadata.location) {
|
|
searchDoc.location = metadata.location;
|
|
}
|
|
|
|
return searchDoc;
|
|
}
|
|
|
|
/**
|
|
* Determine vertical from entity type
|
|
*
|
|
* @param {string} entityType - Entity type from database
|
|
* @returns {string} - Vertical: 'boating', 'marina', 'property'
|
|
*/
|
|
function getVerticalFromEntityType(entityType) {
|
|
if (!entityType) return 'boating'; // Default
|
|
|
|
const type = entityType.toLowerCase();
|
|
|
|
if (type === 'boat' || type === 'vessel') {
|
|
return 'boating';
|
|
}
|
|
|
|
if (type === 'marina' || type === 'yacht-club') {
|
|
return 'marina';
|
|
}
|
|
|
|
if (type === 'condo' || type === 'property' || type === 'building') {
|
|
return 'property';
|
|
}
|
|
|
|
return 'boating'; // Default fallback
|
|
}
|
|
|
|
/**
|
|
* Bulk index multiple document pages
|
|
*
|
|
* @param {Array<Object>} pages - Array of page data objects
|
|
* @returns {Promise<Object>} - Bulk indexing result
|
|
*/
|
|
export async function bulkIndexPages(pages) {
|
|
try {
|
|
const searchDocuments = [];
|
|
|
|
const db = getDb();
|
|
|
|
for (const pageData of pages) {
|
|
// Fetch document metadata for each page
|
|
const document = db.prepare(`
|
|
SELECT
|
|
d.*,
|
|
e.name as entity_name,
|
|
e.entity_type,
|
|
e.make as boat_make,
|
|
e.model as boat_model,
|
|
e.year as boat_year,
|
|
e.vessel_type,
|
|
e.property_type,
|
|
se.name as sub_entity_name,
|
|
c.name as component_name,
|
|
c.manufacturer,
|
|
c.model_number,
|
|
c.serial_number,
|
|
o.name as organization_name
|
|
FROM documents d
|
|
LEFT JOIN entities e ON d.entity_id = e.id
|
|
LEFT JOIN sub_entities se ON d.sub_entity_id = se.id
|
|
LEFT JOIN components c ON d.component_id = c.id
|
|
LEFT JOIN organizations o ON d.organization_id = o.id
|
|
WHERE d.id = ?
|
|
`).get(pageData.documentId);
|
|
|
|
if (document) {
|
|
const documentMetadata = document.metadata ? JSON.parse(document.metadata) : {};
|
|
const searchDoc = buildSearchDocument(pageData, document, documentMetadata);
|
|
searchDocuments.push(searchDoc);
|
|
}
|
|
}
|
|
|
|
// Bulk add to Meilisearch
|
|
const index = await getMeilisearchIndex();
|
|
const result = await index.addDocuments(searchDocuments);
|
|
|
|
console.log(`Bulk indexed ${searchDocuments.length} pages`);
|
|
|
|
return {
|
|
success: true,
|
|
count: searchDocuments.length,
|
|
taskUid: result.taskUid
|
|
};
|
|
} catch (error) {
|
|
console.error('Error bulk indexing pages:', error);
|
|
throw new Error(`Bulk indexing failed: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove a document page from search index
|
|
*
|
|
* @param {string} documentId - Document ID
|
|
* @param {number} pageNumber - Page number
|
|
* @returns {Promise<Object>} - Deletion result
|
|
*/
|
|
export async function removePageFromIndex(documentId, pageNumber) {
|
|
try {
|
|
const meilisearchId = `page_${documentId}_p${pageNumber}`;
|
|
|
|
const index = await getMeilisearchIndex();
|
|
const result = await index.deleteDocument(meilisearchId);
|
|
|
|
console.log(`Removed page ${pageNumber} of document ${documentId} from index`);
|
|
|
|
return {
|
|
success: true,
|
|
taskUid: result.taskUid
|
|
};
|
|
} catch (error) {
|
|
console.error('Error removing page from index:', error);
|
|
throw new Error(`Failed to remove page: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Remove all pages of a document from search index
|
|
*
|
|
* @param {string} documentId - Document ID
|
|
* @returns {Promise<Object>} - Deletion result
|
|
*/
|
|
export async function removeDocumentFromIndex(documentId) {
|
|
try {
|
|
const index = await getMeilisearchIndex();
|
|
|
|
// Delete all pages matching the document ID
|
|
const result = await index.deleteDocuments({
|
|
filter: `docId = "${documentId}"`
|
|
});
|
|
|
|
console.log(`Removed all pages of document ${documentId} from index`);
|
|
|
|
return {
|
|
success: true,
|
|
taskUid: result.taskUid
|
|
};
|
|
} catch (error) {
|
|
console.error('Error removing document from index:', error);
|
|
throw new Error(`Failed to remove document: ${error.message}`);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Search for pages
|
|
*
|
|
* @param {string} query - Search query
|
|
* @param {Object} options - Search options (filters, limit, offset)
|
|
* @returns {Promise<Object>} - Search results
|
|
*/
|
|
export async function searchPages(query, options = {}) {
|
|
try {
|
|
const index = await getMeilisearchIndex();
|
|
|
|
const searchOptions = {
|
|
limit: options.limit || 20,
|
|
offset: options.offset || 0
|
|
};
|
|
|
|
// Add filters if provided
|
|
if (options.filter) {
|
|
searchOptions.filter = options.filter;
|
|
}
|
|
|
|
// Add sort if provided
|
|
if (options.sort) {
|
|
searchOptions.sort = options.sort;
|
|
}
|
|
|
|
const results = await index.search(query, searchOptions);
|
|
|
|
return results;
|
|
} catch (error) {
|
|
console.error('Error searching pages:', error);
|
|
throw new Error(`Search failed: ${error.message}`);
|
|
}
|
|
}
|