From 5f6a7db3c2ec85b0e66254bdcf61cb0259b919ee Mon Sep 17 00:00:00 2001 From: ggq-admin Date: Mon, 20 Oct 2025 01:39:29 +0200 Subject: [PATCH] Add keep-last-n script and clean up all but last 2 documents MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Created utility script to keep only the N most recently uploaded documents and removed 24 old test documents, keeping only the 2 newest. Script Features: - Keeps N most recent documents by created_at timestamp - Deletes older documents from database, filesystem, and Meilisearch - Transaction-safe database deletion with CASCADE - Comprehensive summary report Cleanup Results: - Documents kept: 2 (Sumianda_Network_Upgrade, Liliane1 Prestige Manual EN) - Documents deleted: 24 (all test/duplicate documents) - Database entries removed: 24 documents + related pages/jobs - Meilisearch entries cleaned: 24 documents worth of pages/images - Filesystem folders deleted: 2 (others already cleaned) Remaining Documents: 1. Sumianda_Network_Upgrade (2025-10-19T23:25:49.483Z) 2. Liliane1 Prestige Manual EN (2025-10-19T19:47:35.108Z) Files Added: - server/scripts/keep-last-n.js - Reusable cleanup utility Usage: node scripts/keep-last-n.js [N] # Default: N=2 Testing: Search verified working with clean index at http://172.29.75.55:8083 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- server/scripts/keep-last-n.js | 124 ++++++++++++++++++++++++++++++++++ 1 file changed, 124 insertions(+) create mode 100644 server/scripts/keep-last-n.js diff --git a/server/scripts/keep-last-n.js b/server/scripts/keep-last-n.js new file mode 100644 index 0000000..95d2897 --- /dev/null +++ b/server/scripts/keep-last-n.js @@ -0,0 +1,124 @@ +/** + * Keep only the last N documents (by upload date) + * Removes all others from database, filesystem, and Meilisearch + */ + +import { getDb } from '../db/db.js'; +import { getMeilisearchClient } from '../config/meilisearch.js'; +import { rm } from 'fs/promises'; +import { join, dirname } from 'path'; +import { fileURLToPath } from 'url'; +import { existsSync } from 'fs'; + +const __dirname = dirname(fileURLToPath(import.meta.url)); +const UPLOADS_DIR = join(__dirname, '../../uploads'); + +// Meilisearch config +const MEILISEARCH_HOST = process.env.MEILISEARCH_HOST || 'http://127.0.0.1:7700'; +const INDEX_NAME = process.env.MEILISEARCH_INDEX_NAME || 'navidocs-pages'; + +const KEEP_COUNT = parseInt(process.argv[2]) || 2; + +async function keepLastN() { + console.log(`\nKeeping only the last ${KEEP_COUNT} uploaded documents...\n`); + + const db = getDb(); + const searchClient = getMeilisearchClient(); + + // Get all documents ordered by created_at descending (newest first) + const allDocs = db.prepare(` + SELECT id, title, created_at, file_path + FROM documents + ORDER BY created_at DESC + `).all(); + + console.log(`Total documents in database: ${allDocs.length}\n`); + + if (allDocs.length <= KEEP_COUNT) { + console.log(`Only ${allDocs.length} document(s) exist. Nothing to delete.`); + return; + } + + // Split into keep and delete + const toKeep = allDocs.slice(0, KEEP_COUNT); + const toDelete = allDocs.slice(KEEP_COUNT); + + console.log('Documents to KEEP:'); + toKeep.forEach((doc, i) => { + console.log(` ${i + 1}. ${doc.title} (${doc.id}) - ${new Date(doc.created_at).toISOString()}`); + }); + + console.log(`\nDocuments to DELETE (${toDelete.length}):`); + toDelete.forEach((doc, i) => { + console.log(` ${i + 1}. ${doc.title} (${doc.id}) - ${new Date(doc.created_at).toISOString()}`); + }); + + console.log(`\n=== Starting deletion ===\n`); + + const docIdsToDelete = toDelete.map(d => d.id); + + // Delete from Meilisearch index + console.log('Cleaning Meilisearch index...'); + try { + const index = await searchClient.getIndex(INDEX_NAME); + + for (const doc of toDelete) { + // Delete all pages and images for this document + const filter = `docId = "${doc.id}"`; + await index.deleteDocuments({ filter }); + console.log(` Deleted search entries for: ${doc.title}`); + } + } catch (err) { + console.warn('Warning: Meilisearch cleanup failed:', err.message); + } + + // Delete from database (CASCADE will handle document_pages, ocr_jobs) + console.log('\nDeleting from database...'); + const deleteStmt = db.prepare(`DELETE FROM documents WHERE id = ?`); + const deleteMany = db.transaction((ids) => { + for (const id of ids) { + deleteStmt.run(id); + } + }); + + deleteMany(docIdsToDelete); + console.log(` Deleted ${docIdsToDelete.length} documents from database`); + + // Delete from filesystem + console.log('\nDeleting files from filesystem...'); + let filesDeleted = 0; + let filesFailed = 0; + + for (const doc of toDelete) { + try { + // Delete the entire document folder (includes PDF and images) + const docFolder = join(UPLOADS_DIR, doc.id); + + if (existsSync(docFolder)) { + await rm(docFolder, { recursive: true, force: true }); + console.log(` Deleted folder: ${doc.id}/`); + filesDeleted++; + } else { + console.log(` Folder not found: ${doc.id}/`); + } + } catch (err) { + console.error(` Failed to delete folder ${doc.id}:`, err.message); + filesFailed++; + } + } + + console.log('\n=== Cleanup Summary ==='); + console.log(`Documents kept: ${toKeep.length}`); + console.log(`Documents removed from database: ${docIdsToDelete.length}`); + console.log(`Folders deleted from filesystem: ${filesDeleted}`); + console.log(`Folders failed to delete: ${filesFailed}`); + console.log('\nCleanup complete!'); +} + +// Run cleanup +keepLastN() + .then(() => process.exit(0)) + .catch(err => { + console.error('Cleanup failed:', err); + process.exit(1); + });