navidocs/server/services/search.js
ggq-admin 155a8c0305 feat: NaviDocs MVP - Complete codebase extraction from lilian1
## Backend (server/)
- Express 5 API with security middleware (helmet, rate limiting)
- SQLite database with WAL mode (schema from docs/architecture/)
- Meilisearch integration with tenant tokens
- BullMQ + Redis background job queue
- OCR pipeline with Tesseract.js
- File safety validation (extension, MIME, size)
- 4 API route modules: upload, jobs, search, documents

## Frontend (client/)
- Vue 3 with Composition API (<script setup>)
- Vite 5 build system with HMR
- Tailwind CSS (Meilisearch-inspired design)
- UploadModal with drag-and-drop
- FigureZoom component (ported from lilian1)
- Meilisearch search integration with tenant tokens
- Job polling composable
- Clean SVG icons (no emojis)

## Code Extraction
-  manuals.js → UploadModal.vue, useJobPolling.js
-  figure-zoom.js → FigureZoom.vue
-  service-worker.js → client/public/service-worker.js (TODO)
-  glossary.json → Merged into Meilisearch synonyms
-  Discarded: quiz.js, persona.js, gamification.js (Frank-AI junk)

## Documentation
- Complete extraction plan in docs/analysis/
- README with quick start guide
- Architecture summary in docs/architecture/

## Build Status
- Server dependencies:  Installed (234 packages)
- Client dependencies:  Installed (160 packages)
- Client build:  Successful (2.63s)

🤖 Generated with Claude Code

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 01:55:44 +02:00

376 lines
11 KiB
JavaScript

/**
* Search Service - Meilisearch indexing and search operations
*
* Features:
* - Index document pages in Meilisearch
* - Build proper document structure from schema
* - Handle metadata enrichment
* - Support multi-vertical indexing (boat, marina, property)
*/
import { getMeilisearchIndex } from '../config/meilisearch.js';
import { getDb } from '../config/db.js';
/**
* Index a document page in Meilisearch
*
* @param {Object} pageData - Page data to index
* @param {string} pageData.pageId - Document page ID
* @param {string} pageData.documentId - Document ID
* @param {number} pageData.pageNumber - Page number (1-based)
* @param {string} pageData.text - OCR extracted text
* @param {number} pageData.confidence - OCR confidence (0-1)
* @returns {Promise<Object>} - Indexing result
*/
export async function indexDocumentPage(pageData) {
try {
const db = getDb();
// Fetch full document and entity metadata
const document = db.prepare(`
SELECT
d.*,
e.name as entity_name,
e.entity_type,
e.make as boat_make,
e.model as boat_model,
e.year as boat_year,
e.vessel_type,
e.property_type,
se.name as sub_entity_name,
c.name as component_name,
c.manufacturer,
c.model_number,
c.serial_number,
o.name as organization_name
FROM documents d
LEFT JOIN entities e ON d.entity_id = e.id
LEFT JOIN sub_entities se ON d.sub_entity_id = se.id
LEFT JOIN components c ON d.component_id = c.id
LEFT JOIN organizations o ON d.organization_id = o.id
WHERE d.id = ?
`).get(pageData.documentId);
if (!document) {
throw new Error(`Document not found: ${pageData.documentId}`);
}
// Parse metadata JSON fields
const documentMetadata = document.metadata ? JSON.parse(document.metadata) : {};
// Build Meilisearch document according to schema
const searchDocument = buildSearchDocument(pageData, document, documentMetadata);
// Get Meilisearch index
const index = await getMeilisearchIndex();
// Add document to index
const result = await index.addDocuments([searchDocument]);
console.log(`Indexed page ${pageData.pageNumber} of document ${pageData.documentId}`);
// Update document_pages table with search metadata
db.prepare(`
UPDATE document_pages
SET search_indexed_at = ?,
meilisearch_id = ?
WHERE id = ?
`).run(
Math.floor(Date.now() / 1000),
searchDocument.id,
pageData.pageId
);
return {
success: true,
documentId: searchDocument.id,
taskUid: result.taskUid
};
} catch (error) {
console.error('Error indexing document page:', error);
throw new Error(`Failed to index page: ${error.message}`);
}
}
/**
* Build Meilisearch document structure from page data and metadata
*
* Follows schema defined in docs/architecture/meilisearch-config.json
*
* @param {Object} pageData - Page OCR data
* @param {Object} document - Document database record
* @param {Object} metadata - Parsed document metadata
* @returns {Object} - Meilisearch document
*/
function buildSearchDocument(pageData, document, metadata) {
const now = Math.floor(Date.now() / 1000);
// Determine vertical based on entity type
const vertical = getVerticalFromEntityType(document.entity_type);
// Base document structure
const searchDoc = {
// Required fields
id: `page_${document.id}_p${pageData.pageNumber}`,
vertical: vertical,
organizationId: document.organization_id,
organizationName: document.organization_name || 'Unknown Organization',
entityId: document.entity_id || 'unknown',
entityName: document.entity_name || 'Unknown Entity',
entityType: document.entity_type || 'unknown',
docId: document.id,
userId: document.uploaded_by,
documentType: document.document_type || 'manual',
title: metadata.title || document.title || `Page ${pageData.pageNumber}`,
pageNumber: pageData.pageNumber,
text: pageData.text,
language: document.language || 'en',
ocrConfidence: pageData.confidence,
createdAt: document.created_at,
updatedAt: now
};
// Optional: Sub-entity (system, dock, unit)
if (document.sub_entity_id) {
searchDoc.subEntityId = document.sub_entity_id;
searchDoc.subEntityName = document.sub_entity_name;
}
// Optional: Component
if (document.component_id) {
searchDoc.componentId = document.component_id;
searchDoc.componentName = document.component_name;
searchDoc.manufacturer = document.manufacturer;
searchDoc.modelNumber = document.model_number;
searchDoc.serialNumber = document.serial_number;
}
// Optional: Categorization
if (metadata.systems) {
searchDoc.systems = Array.isArray(metadata.systems) ? metadata.systems : [metadata.systems];
}
if (metadata.categories) {
searchDoc.categories = Array.isArray(metadata.categories) ? metadata.categories : [metadata.categories];
}
if (metadata.tags) {
searchDoc.tags = Array.isArray(metadata.tags) ? metadata.tags : [metadata.tags];
}
// Boating vertical fields
if (vertical === 'boating') {
searchDoc.boatName = document.entity_name;
if (document.boat_make) searchDoc.boatMake = document.boat_make;
if (document.boat_model) searchDoc.boatModel = document.boat_model;
if (document.boat_year) searchDoc.boatYear = document.boat_year;
if (document.vessel_type) searchDoc.vesselType = document.vessel_type;
}
// Property/Marina vertical fields
if (vertical === 'property' || vertical === 'marina') {
if (document.property_type) searchDoc.propertyType = document.property_type;
if (document.facility_type) searchDoc.facilityType = document.facility_type;
}
// Optional: Priority and offline caching
if (metadata.priority) {
searchDoc.priority = metadata.priority;
}
if (metadata.offlineCache !== undefined) {
searchDoc.offlineCache = metadata.offlineCache;
}
// Optional: Compliance/Inspection data
if (metadata.complianceType) searchDoc.complianceType = metadata.complianceType;
if (metadata.inspectionDate) searchDoc.inspectionDate = metadata.inspectionDate;
if (metadata.nextDue) searchDoc.nextDue = metadata.nextDue;
if (metadata.status) searchDoc.status = metadata.status;
// Optional: Location data
if (metadata.location) {
searchDoc.location = metadata.location;
}
return searchDoc;
}
/**
* Determine vertical from entity type
*
* @param {string} entityType - Entity type from database
* @returns {string} - Vertical: 'boating', 'marina', 'property'
*/
function getVerticalFromEntityType(entityType) {
if (!entityType) return 'boating'; // Default
const type = entityType.toLowerCase();
if (type === 'boat' || type === 'vessel') {
return 'boating';
}
if (type === 'marina' || type === 'yacht-club') {
return 'marina';
}
if (type === 'condo' || type === 'property' || type === 'building') {
return 'property';
}
return 'boating'; // Default fallback
}
/**
* Bulk index multiple document pages
*
* @param {Array<Object>} pages - Array of page data objects
* @returns {Promise<Object>} - Bulk indexing result
*/
export async function bulkIndexPages(pages) {
try {
const searchDocuments = [];
const db = getDb();
for (const pageData of pages) {
// Fetch document metadata for each page
const document = db.prepare(`
SELECT
d.*,
e.name as entity_name,
e.entity_type,
e.make as boat_make,
e.model as boat_model,
e.year as boat_year,
e.vessel_type,
e.property_type,
se.name as sub_entity_name,
c.name as component_name,
c.manufacturer,
c.model_number,
c.serial_number,
o.name as organization_name
FROM documents d
LEFT JOIN entities e ON d.entity_id = e.id
LEFT JOIN sub_entities se ON d.sub_entity_id = se.id
LEFT JOIN components c ON d.component_id = c.id
LEFT JOIN organizations o ON d.organization_id = o.id
WHERE d.id = ?
`).get(pageData.documentId);
if (document) {
const documentMetadata = document.metadata ? JSON.parse(document.metadata) : {};
const searchDoc = buildSearchDocument(pageData, document, documentMetadata);
searchDocuments.push(searchDoc);
}
}
// Bulk add to Meilisearch
const index = await getMeilisearchIndex();
const result = await index.addDocuments(searchDocuments);
console.log(`Bulk indexed ${searchDocuments.length} pages`);
return {
success: true,
count: searchDocuments.length,
taskUid: result.taskUid
};
} catch (error) {
console.error('Error bulk indexing pages:', error);
throw new Error(`Bulk indexing failed: ${error.message}`);
}
}
/**
* Remove a document page from search index
*
* @param {string} documentId - Document ID
* @param {number} pageNumber - Page number
* @returns {Promise<Object>} - Deletion result
*/
export async function removePageFromIndex(documentId, pageNumber) {
try {
const meilisearchId = `page_${documentId}_p${pageNumber}`;
const index = await getMeilisearchIndex();
const result = await index.deleteDocument(meilisearchId);
console.log(`Removed page ${pageNumber} of document ${documentId} from index`);
return {
success: true,
taskUid: result.taskUid
};
} catch (error) {
console.error('Error removing page from index:', error);
throw new Error(`Failed to remove page: ${error.message}`);
}
}
/**
* Remove all pages of a document from search index
*
* @param {string} documentId - Document ID
* @returns {Promise<Object>} - Deletion result
*/
export async function removeDocumentFromIndex(documentId) {
try {
const index = await getMeilisearchIndex();
// Delete all pages matching the document ID
const result = await index.deleteDocuments({
filter: `docId = "${documentId}"`
});
console.log(`Removed all pages of document ${documentId} from index`);
return {
success: true,
taskUid: result.taskUid
};
} catch (error) {
console.error('Error removing document from index:', error);
throw new Error(`Failed to remove document: ${error.message}`);
}
}
/**
* Search for pages
*
* @param {string} query - Search query
* @param {Object} options - Search options (filters, limit, offset)
* @returns {Promise<Object>} - Search results
*/
export async function searchPages(query, options = {}) {
try {
const index = await getMeilisearchIndex();
const searchOptions = {
limit: options.limit || 20,
offset: options.offset || 0
};
// Add filters if provided
if (options.filter) {
searchOptions.filter = options.filter;
}
// Add sort if provided
if (options.sort) {
searchOptions.sort = options.sort;
}
const results = await index.search(query, searchOptions);
return results;
} catch (error) {
console.error('Error searching pages:', error);
throw new Error(`Search failed: ${error.message}`);
}
}