navidocs/server/routes/toc.js
ggq-admin fb88b291de feat: Add interactive Table of Contents navigation with i18n support
Implements complete TOC feature for document navigation with bilingual support.

## TOC Detection & Extraction
- Pattern-based TOC detection with 3 regex patterns
- Heuristic validation (30%+ match ratio, 5+ entries, sequential pages)
- Hierarchical section key parsing (e.g., "4.1.2" → level 3, parent "4.1")
- Database schema with parent-child relationships
- Automatic extraction during OCR post-processing
- Server-side LRU caching (200 entries, 30min TTL)

## UI Components
- TocSidebar: Collapsible sidebar (320px) with auto-open on TOC presence
- TocEntry: Recursive component for hierarchical rendering
- Flex layout: Sidebar + PDF viewer side-by-side
- Active page highlighting with real-time sync
- localStorage persistence for sidebar state

## Navigation Features
- Click TOC entry → PDF jumps to page
- Deep link support: URL hash format #p=12
- Page change events: navidocs:pagechange custom event
- URL hash updates on all navigation (next/prev/goTo/TOC)
- Hash change listener for external navigation
- Page clamping and validation

## Search Integration
- "Jump to section" button in search results
- Shows when result has section field
- Navigates to document with page number and hash

## Accessibility
- ARIA attributes: role, aria-label, aria-expanded, aria-current
- Keyboard navigation: Enter/Space on entries, Tab focus
- Screen reader support with aria-live regions
- Semantic HTML with proper list/listitem roles

## Internationalization (i18n)
- Vue I18n integration with vue-i18n package
- English and French translations
- 8 TOC-specific translation keys
- Language switcher component in document viewer
- Locale persistence in localStorage

## Error Handling
- Specific error messages for each failure case
- Validation before processing (doc exists, has pages, has OCR)
- Non-blocking TOC extraction (doesn't fail OCR jobs)
- Detailed error returns: {success, error, entriesCount, pages}

## API Endpoints
- GET /api/documents/:id/toc?format=flat|tree
- POST /api/documents/:id/toc/extract
- Cache invalidation on re-extraction

## Testing
- Smoke test script: 9 comprehensive tests
- E2E testing guide with 5 manual scenarios
- Tests cover: API, caching, validation, navigation, search

## Database
- Migration 002: document_toc table
- Fields: id, document_id, title, section_key, page_start, level, parent_id, order_index
- Foreign keys with CASCADE delete

## Files Changed
- New: TocSidebar.vue, TocEntry.vue, LanguageSwitcher.vue
- New: toc-extractor.js, toc.js routes, i18n setup
- Modified: DocumentView.vue (sidebar, deep links, events)
- Modified: SearchView.vue (Jump to section button)
- Modified: ocr-worker.js (TOC post-processing)
- New: toc-smoke-test.sh, TOC_E2E_TEST.md

Generated with Claude Code (https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-20 13:22:45 +02:00

97 lines
2.6 KiB
JavaScript

/**
* TOC Route - Table of Contents API
* GET /api/documents/:documentId/toc - Get TOC for document
* POST /api/documents/:documentId/toc/extract - Trigger TOC extraction
*/
import express from 'express';
import { LRUCache } from 'lru-cache';
import { getDocumentToc, buildTocTree, extractTocFromDocument } from '../services/toc-extractor.js';
const router = express.Router();
// LRU cache for TOC results
const tocCache = new LRUCache({
max: 200,
ttl: 1000 * 60 * 30 // 30 minutes
});
/**
* GET /api/documents/:documentId/toc
* Get Table of Contents for a document
*
* @param {string} documentId - Document UUID
* @query {string} format - "flat" (default) or "tree"
* @returns {Object} { entries: Array, format: string }
*/
router.get('/documents/:documentId/toc', async (req, res) => {
try {
const { documentId } = req.params;
const format = req.query.format || 'flat';
const cacheKey = `toc:${documentId}:${format}`;
let entries = tocCache.get(cacheKey);
if (!entries) {
entries = getDocumentToc(documentId);
tocCache.set(cacheKey, entries);
}
if (format === 'tree') {
const tree = buildTocTree(entries);
return res.json({ entries: tree, format: 'tree', count: entries.length });
}
res.json({ entries, format: 'flat', count: entries.length });
} catch (error) {
console.error('TOC fetch error:', error);
res.status(500).json({
error: 'Failed to fetch TOC',
message: error.message
});
}
});
/**
* POST /api/documents/:documentId/toc/extract
* Trigger TOC extraction for a document
*
* @param {string} documentId - Document UUID
* @returns {Object} { success: boolean, entriesCount: number, pages: number[] }
*/
router.post('/documents/:documentId/toc/extract', async (req, res) => {
try {
const { documentId } = req.params;
const result = await extractTocFromDocument(documentId);
if (!result.success) {
return res.status(400).json({
error: 'TOC extraction failed',
message: result.error || result.message
});
}
// Invalidate cache after extraction
tocCache.delete(`toc:${documentId}:flat`);
tocCache.delete(`toc:${documentId}:tree`);
res.json({
success: true,
entriesCount: result.entriesCount,
tocPages: result.pages,
message: result.entriesCount > 0
? `Extracted ${result.entriesCount} TOC entries from ${result.pages.length} page(s)`
: 'No TOC detected in document'
});
} catch (error) {
console.error('TOC extraction error:', error);
res.status(500).json({
error: 'TOC extraction failed',
message: error.message
});
}
});
export default router;