diff --git a/client/package.json b/client/package.json
index e201aa7..009c534 100644
--- a/client/package.json
+++ b/client/package.json
@@ -6,21 +6,24 @@
"scripts": {
"dev": "vite",
"build": "vite build",
- "preview": "vite preview"
+ "preview": "vite preview",
+ "i18n:lint": "node scripts/i18n-keys-lint.js",
+ "test": "npm run i18n:lint"
},
"dependencies": {
- "vue": "^3.5.0",
- "vue-router": "^4.4.0",
- "pinia": "^2.2.0",
+ "meilisearch": "^0.41.0",
"pdfjs-dist": "^4.0.0",
- "meilisearch": "^0.41.0"
+ "pinia": "^2.2.0",
+ "vue": "^3.5.0",
+ "vue-i18n": "^9.14.5",
+ "vue-router": "^4.4.0"
},
"devDependencies": {
"@vitejs/plugin-vue": "^5.0.0",
- "vite": "^5.0.0",
- "tailwindcss": "^3.4.0",
"autoprefixer": "^10.4.0",
+ "playwright": "^1.40.0",
"postcss": "^8.4.0",
- "playwright": "^1.40.0"
+ "tailwindcss": "^3.4.0",
+ "vite": "^5.0.0"
}
}
diff --git a/client/src/components/LanguageSwitcher.vue b/client/src/components/LanguageSwitcher.vue
new file mode 100644
index 0000000..7651f42
--- /dev/null
+++ b/client/src/components/LanguageSwitcher.vue
@@ -0,0 +1,170 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/client/src/components/TocEntry.vue b/client/src/components/TocEntry.vue
new file mode 100644
index 0000000..45a82b3
--- /dev/null
+++ b/client/src/components/TocEntry.vue
@@ -0,0 +1,218 @@
+
+
+
+
+
+
+
+
+
+
+ {{ entry.section_key }}
+
+
+
+ {{ entry.title }}
+
+
+ {{ entry.page_start }}
+
+
+
+
+
+
+
+
+
+
+
diff --git a/client/src/components/TocSidebar.vue b/client/src/components/TocSidebar.vue
new file mode 100644
index 0000000..8573398
--- /dev/null
+++ b/client/src/components/TocSidebar.vue
@@ -0,0 +1,306 @@
+
+
+
+
+
+
+
diff --git a/client/src/i18n/index.js b/client/src/i18n/index.js
new file mode 100644
index 0000000..18232ae
--- /dev/null
+++ b/client/src/i18n/index.js
@@ -0,0 +1,121 @@
+/**
+ * Vue I18n Configuration
+ * Internationalization setup for NaviDocs
+ * Supports EN/FR with browser language detection
+ */
+
+import { createI18n } from 'vue-i18n'
+import en from './locales/en.json'
+import fr from './locales/fr.json'
+
+// Detect browser language
+function getBrowserLocale() {
+ const navigatorLocale =
+ navigator.languages !== undefined
+ ? navigator.languages[0]
+ : navigator.language
+
+ if (!navigatorLocale) {
+ return 'en'
+ }
+
+ // Extract language code (en-US -> en, fr-FR -> fr)
+ const languageCode = navigatorLocale.trim().split(/[-_]/)[0]
+
+ // Check if we support this language
+ const supportedLocales = ['en', 'fr']
+ return supportedLocales.includes(languageCode) ? languageCode : 'en'
+}
+
+// Get stored locale or browser locale
+function getStartingLocale() {
+ const storedLocale = localStorage.getItem('navidocs-locale')
+ if (storedLocale) {
+ return storedLocale
+ }
+
+ return getBrowserLocale()
+}
+
+const i18n = createI18n({
+ legacy: false, // Use Composition API mode
+ locale: getStartingLocale(),
+ fallbackLocale: 'en',
+ messages: {
+ en,
+ fr
+ },
+ // Development guards: warn about missing translations
+ missing: (locale, key) => {
+ console.error(`[i18n] Missing translation: ${locale}:${key}`)
+ },
+ missingWarn: import.meta.env.DEV, // Only warn in development
+ fallbackWarn: import.meta.env.DEV,
+ // Enable number and date formatting
+ datetimeFormats: {
+ en: {
+ short: {
+ year: 'numeric',
+ month: 'short',
+ day: 'numeric'
+ },
+ long: {
+ year: 'numeric',
+ month: 'long',
+ day: 'numeric',
+ hour: 'numeric',
+ minute: 'numeric'
+ }
+ },
+ fr: {
+ short: {
+ year: 'numeric',
+ month: 'short',
+ day: 'numeric'
+ },
+ long: {
+ year: 'numeric',
+ month: 'long',
+ day: 'numeric',
+ hour: 'numeric',
+ minute: 'numeric'
+ }
+ }
+ },
+ numberFormats: {
+ en: {
+ currency: {
+ style: 'currency',
+ currency: 'USD'
+ },
+ decimal: {
+ style: 'decimal',
+ minimumFractionDigits: 2,
+ maximumFractionDigits: 2
+ }
+ },
+ fr: {
+ currency: {
+ style: 'currency',
+ currency: 'EUR'
+ },
+ decimal: {
+ style: 'decimal',
+ minimumFractionDigits: 2,
+ maximumFractionDigits: 2
+ }
+ }
+ }
+})
+
+// Helper function to switch locale
+export function setLocale(locale) {
+ i18n.global.locale.value = locale
+ localStorage.setItem('navidocs-locale', locale)
+ document.querySelector('html').setAttribute('lang', locale)
+}
+
+// Set initial HTML lang attribute
+document.querySelector('html').setAttribute('lang', getStartingLocale())
+
+export default i18n
diff --git a/client/src/i18n/locales/en.json b/client/src/i18n/locales/en.json
new file mode 100644
index 0000000..f255648
--- /dev/null
+++ b/client/src/i18n/locales/en.json
@@ -0,0 +1,160 @@
+{
+ "app": {
+ "title": "NaviDocs",
+ "tagline": "Marine Manual Intelligence"
+ },
+ "nav": {
+ "home": "Home",
+ "search": "Search",
+ "documents": "Documents",
+ "upload": "Upload",
+ "stats": "Statistics"
+ },
+ "home": {
+ "welcome": "Welcome to NaviDocs",
+ "searchPlaceholder": "Search your boat manuals and documentation...",
+ "recentDocuments": "Recent Documents",
+ "quickActions": "Quick Actions",
+ "uploadDocument": "Upload Document",
+ "viewAll": "View All Documents"
+ },
+ "search": {
+ "title": "Search Results",
+ "placeholder": "Search manuals, specs, and documentation...",
+ "searching": "Searching...",
+ "noResults": "No matches found",
+ "noResultsHint": "Try different search terms or check the spelling",
+ "resultsCount": "{count} result | {count} results",
+ "page": "Page",
+ "section": "Section",
+ "expand": "Show context",
+ "collapse": "Hide context",
+ "viewDocument": "View document",
+ "prevPage": "Previous page",
+ "currentPage": "Current page",
+ "nextPage": "Next page",
+ "noDiagram": "No diagram"
+ },
+ "document": {
+ "title": "Document Viewer",
+ "back": "Back",
+ "page": "Page",
+ "of": "of",
+ "images": "image | images",
+ "previous": "Previous",
+ "next": "Next",
+ "goToPage": "Go",
+ "loading": "Loading document...",
+ "rendering": "Rendering page...",
+ "error": "Unable to render document",
+ "retry": "Retry",
+ "findBar": {
+ "noMatches": "No matches",
+ "matchCount": "{current} / {total}",
+ "previousMatch": "Previous match",
+ "nextMatch": "Next match",
+ "jumpTo": "Jump to",
+ "match": "Match",
+ "moreMatches": "+ {count} more matches"
+ }
+ },
+ "upload": {
+ "title": "Upload Documents",
+ "dropZone": "Drop PDF files here or click to browse",
+ "browseFiles": "Browse Files",
+ "uploading": "Uploading...",
+ "processing": "Processing...",
+ "success": "Upload successful",
+ "error": "Upload failed",
+ "maxSize": "Maximum file size: {size}MB",
+ "supportedFormats": "Supported formats: PDF",
+ "documentInfo": "Document Information",
+ "documentTitle": "Document Title",
+ "documentType": "Document Type",
+ "boatInfo": "Boat Information",
+ "boatName": "Boat Name",
+ "boatMake": "Manufacturer",
+ "boatModel": "Model",
+ "boatYear": "Year",
+ "submit": "Upload",
+ "cancel": "Cancel",
+ "types": {
+ "manual": "Owner's Manual",
+ "service": "Service Manual",
+ "component": "Component Manual",
+ "wiring": "Wiring Diagram",
+ "parts": "Parts List",
+ "other": "Other Documentation"
+ }
+ },
+ "stats": {
+ "title": "Statistics",
+ "overview": "Overview",
+ "totalDocuments": "Total Documents",
+ "totalPages": "Total Pages",
+ "storageUsed": "Storage Used",
+ "recentActivity": "Recent Activity",
+ "documentsByType": "Documents by Type",
+ "pagesByBoat": "Pages by Boat",
+ "searchActivity": "Search Activity",
+ "topSearchTerms": "Top Search Terms"
+ },
+ "common": {
+ "loading": "Loading...",
+ "error": "Error",
+ "success": "Success",
+ "save": "Save",
+ "cancel": "Cancel",
+ "delete": "Delete",
+ "edit": "Edit",
+ "close": "Close",
+ "confirm": "Confirm",
+ "yes": "Yes",
+ "no": "No",
+ "search": "Search",
+ "filter": "Filter",
+ "sort": "Sort",
+ "actions": "Actions",
+ "viewDetails": "View Details",
+ "download": "Download",
+ "share": "Share",
+ "print": "Print"
+ },
+ "marine": {
+ "systems": {
+ "electrical": "Electrical System",
+ "plumbing": "Plumbing & Water",
+ "navigation": "Navigation",
+ "propulsion": "Propulsion",
+ "hvac": "Climate Control",
+ "safety": "Safety Equipment",
+ "galley": "Galley",
+ "head": "Head",
+ "deck": "Deck Equipment",
+ "rigging": "Rigging"
+ },
+ "categories": {
+ "maintenance": "Maintenance",
+ "troubleshooting": "Troubleshooting",
+ "installation": "Installation",
+ "operation": "Operation",
+ "safety": "Safety Procedures",
+ "specifications": "Specifications"
+ }
+ },
+ "toc": {
+ "tableOfContents": "Table of Contents",
+ "loading": "Loading index...",
+ "noTocFound": "No table of contents found in this document",
+ "extract": "Extract TOC",
+ "entries": "entries",
+ "expand": "Expand index",
+ "collapse": "Collapse index",
+ "jumpToSection": "Jump to section"
+ },
+ "language": {
+ "select": "Select Language",
+ "en": "English",
+ "fr": "Français"
+ }
+}
diff --git a/client/src/i18n/locales/fr.json b/client/src/i18n/locales/fr.json
new file mode 100644
index 0000000..6b571b0
--- /dev/null
+++ b/client/src/i18n/locales/fr.json
@@ -0,0 +1,160 @@
+{
+ "app": {
+ "title": "NaviDocs",
+ "tagline": "Intelligence Nautique"
+ },
+ "nav": {
+ "home": "Accueil",
+ "search": "Recherche",
+ "documents": "Documents",
+ "upload": "Téléverser",
+ "stats": "Statistiques"
+ },
+ "home": {
+ "welcome": "Bienvenue sur NaviDocs",
+ "searchPlaceholder": "Rechercher dans vos manuels et documentation nautique...",
+ "recentDocuments": "Documents récents",
+ "quickActions": "Actions rapides",
+ "uploadDocument": "Téléverser un document",
+ "viewAll": "Voir tous les documents"
+ },
+ "search": {
+ "title": "Résultats de recherche",
+ "placeholder": "Rechercher dans les manuels, spécifications et documentation...",
+ "searching": "Recherche en cours...",
+ "noResults": "Aucun résultat",
+ "noResultsHint": "Essayez d'autres termes ou vérifiez l'orthographe",
+ "resultsCount": "{count} résultat | {count} résultats",
+ "page": "Page",
+ "section": "Section",
+ "expand": "Afficher le contexte",
+ "collapse": "Masquer le contexte",
+ "viewDocument": "Voir le document",
+ "prevPage": "Page précédente",
+ "currentPage": "Page actuelle",
+ "nextPage": "Page suivante",
+ "noDiagram": "Pas de schéma"
+ },
+ "document": {
+ "title": "Visionneuse de documents",
+ "back": "Retour",
+ "page": "Page",
+ "of": "sur",
+ "images": "image | images",
+ "previous": "Précédent",
+ "next": "Suivant",
+ "goToPage": "Aller",
+ "loading": "Chargement du document...",
+ "rendering": "Affichage de la page...",
+ "error": "Impossible d'afficher le document",
+ "retry": "Réessayer",
+ "findBar": {
+ "noMatches": "Aucune correspondance",
+ "matchCount": "{current} / {total}",
+ "previousMatch": "Correspondance précédente",
+ "nextMatch": "Correspondance suivante",
+ "jumpTo": "Aller à",
+ "match": "Correspondance",
+ "moreMatches": "+ {count} correspondances supplémentaires"
+ }
+ },
+ "upload": {
+ "title": "Téléverser des documents",
+ "dropZone": "Déposez les fichiers PDF ici ou cliquez pour parcourir",
+ "browseFiles": "Parcourir les fichiers",
+ "uploading": "Téléversement en cours...",
+ "processing": "Traitement en cours...",
+ "success": "Téléversement réussi",
+ "error": "Échec du téléversement",
+ "maxSize": "Taille maximale du fichier : {size}Mo",
+ "supportedFormats": "Formats pris en charge : PDF",
+ "documentInfo": "Informations du document",
+ "documentTitle": "Titre du document",
+ "documentType": "Type de document",
+ "boatInfo": "Informations du bateau",
+ "boatName": "Nom du bateau",
+ "boatMake": "Constructeur",
+ "boatModel": "Modèle",
+ "boatYear": "Année",
+ "submit": "Téléverser",
+ "cancel": "Annuler",
+ "types": {
+ "manual": "Manuel du propriétaire",
+ "service": "Manuel d'entretien",
+ "component": "Manuel de composant",
+ "wiring": "Schéma électrique",
+ "parts": "Liste de pièces",
+ "other": "Autre documentation"
+ }
+ },
+ "stats": {
+ "title": "Statistiques",
+ "overview": "Aperçu",
+ "totalDocuments": "Documents totaux",
+ "totalPages": "Pages totales",
+ "storageUsed": "Espace utilisé",
+ "recentActivity": "Activité récente",
+ "documentsByType": "Documents par type",
+ "pagesByBoat": "Pages par bateau",
+ "searchActivity": "Activité de recherche",
+ "topSearchTerms": "Termes les plus recherchés"
+ },
+ "common": {
+ "loading": "Chargement...",
+ "error": "Erreur",
+ "success": "Succès",
+ "save": "Enregistrer",
+ "cancel": "Annuler",
+ "delete": "Supprimer",
+ "edit": "Modifier",
+ "close": "Fermer",
+ "confirm": "Confirmer",
+ "yes": "Oui",
+ "no": "Non",
+ "search": "Rechercher",
+ "filter": "Filtrer",
+ "sort": "Trier",
+ "actions": "Actions",
+ "viewDetails": "Voir les détails",
+ "download": "Télécharger",
+ "share": "Partager",
+ "print": "Imprimer"
+ },
+ "marine": {
+ "systems": {
+ "electrical": "Système électrique",
+ "plumbing": "Plomberie & eau",
+ "navigation": "Navigation",
+ "propulsion": "Propulsion",
+ "hvac": "Climatisation",
+ "safety": "Équipement de sécurité",
+ "galley": "Cuisine",
+ "head": "Toilettes",
+ "deck": "Équipement de pont",
+ "rigging": "Gréement"
+ },
+ "categories": {
+ "maintenance": "Entretien",
+ "troubleshooting": "Dépannage",
+ "installation": "Installation",
+ "operation": "Fonctionnement",
+ "safety": "Procédures de sécurité",
+ "specifications": "Spécifications"
+ }
+ },
+ "toc": {
+ "tableOfContents": "Table des matières",
+ "loading": "Chargement de l'index...",
+ "noTocFound": "Aucune table des matières trouvée dans ce document",
+ "extract": "Extraire la table",
+ "entries": "entrées",
+ "expand": "Développer l'index",
+ "collapse": "Réduire l'index",
+ "jumpToSection": "Aller à la section"
+ },
+ "language": {
+ "select": "Choisir la langue",
+ "en": "English",
+ "fr": "Français"
+ }
+}
diff --git a/client/src/main.js b/client/src/main.js
index bad15fe..e217226 100644
--- a/client/src/main.js
+++ b/client/src/main.js
@@ -5,6 +5,7 @@
import { createApp } from 'vue'
import { createPinia } from 'pinia'
import router from './router'
+import i18n from './i18n'
import App from './App.vue'
import './assets/main.css'
@@ -12,6 +13,7 @@ const app = createApp(App)
app.use(createPinia())
app.use(router)
+app.use(i18n)
app.mount('#app')
diff --git a/client/src/views/DocumentView.vue b/client/src/views/DocumentView.vue
index a7c190a..9a29e0a 100644
--- a/client/src/views/DocumentView.vue
+++ b/client/src/views/DocumentView.vue
@@ -8,7 +8,7 @@
- Back
+ {{ $t('document.back') }}
@@ -17,10 +17,83 @@
- Page {{ currentPage }} / {{ totalPages }}
+ {{ $t('document.page') }} {{ currentPage }} {{ $t('document.of') }} {{ totalPages }}
- ({{ pageImages.length }} {{ pageImages.length === 1 ? 'image' : 'images' }})
+ ({{ pageImages.length }} {{ $t('document.images', pageImages.length) }})
+
+
+
+
+
+
+
+
+
+
+
{{ searchQuery }}
+
+
+
+
+ {{ totalHits === 0 ? $t('document.findBar.noMatches') : $t('document.findBar.matchCount', { current: currentHitIndex + 1, total: totalHits }) }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ + {{ hitList.length - 5 }} more matches
+
+
@@ -34,7 +107,7 @@
- Previous
+ {{ $t('document.previous') }}
@@ -48,7 +121,7 @@
class="w-16 px-3 py-2 bg-white/10 text-white border border-white/20 rounded-lg text-center focus:outline-none focus:ring-2 focus:ring-pink-400 focus:border-pink-400"
/>
@@ -57,7 +130,7 @@
:disabled="currentPage >= totalPages || isRendering"
class="px-4 py-2 bg-white/10 hover:bg-white/15 disabled:bg-white/5 disabled:text-white/30 text-white rounded-lg transition-colors flex items-center gap-2 border border-white/10"
>
- Next
+ {{ $t('document.next') }}
@@ -66,9 +139,19 @@
-
-
-
+
+
+
+
+
+
+
+
@@ -143,6 +227,8 @@ import * as pdfjsLib from 'pdfjs-dist'
import 'pdfjs-dist/web/pdf_viewer.css'
import ImageOverlay from '../components/ImageOverlay.vue'
import FigureZoom from '../components/FigureZoom.vue'
+import LanguageSwitcher from '../components/LanguageSwitcher.vue'
+import TocSidebar from '../components/TocSidebar.vue'
import { useDocumentImages } from '../composables/useDocumentImages'
// Configure PDF.js worker - use local worker file instead of CDN
@@ -168,6 +254,12 @@ const canvasContainer = ref(null)
const textLayer = ref(null)
const isRendering = ref(false)
+// Find bar state
+const currentHitIndex = ref(0)
+const totalHits = ref(0)
+const hitList = ref([])
+const jumpListOpen = ref(false)
+
// PDF rendering scale
const pdfScale = ref(1.5)
@@ -218,11 +310,17 @@ async function loadDocument() {
}
function highlightSearchTerms() {
- if (!textLayer.value || !searchQuery.value) return
+ if (!textLayer.value || !searchQuery.value) {
+ totalHits.value = 0
+ hitList.value = []
+ currentHitIndex.value = 0
+ return
+ }
const spans = textLayer.value.querySelectorAll('span')
const query = searchQuery.value.toLowerCase().trim()
- let firstMatch = null
+ const hits = []
+ let hitIndex = 0
spans.forEach(span => {
const text = span.textContent
@@ -230,28 +328,86 @@ function highlightSearchTerms() {
const lowerText = text.toLowerCase()
if (lowerText.includes(query)) {
- // Create a highlighted version
+ // Create a highlighted version with data attributes
const regex = new RegExp(`(${query.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})`, 'gi')
- const highlightedText = text.replace(regex, '
$1')
+ const highlightedText = text.replace(regex, (match) => {
+ const idx = hitIndex
+ hitIndex++
+ return `
${match}`
+ })
- // Wrap in a container to preserve PDF.js positioning
span.innerHTML = highlightedText
- // Track first match for scrolling
- if (!firstMatch) {
- firstMatch = span
- }
+ // Collect hit information for jump list
+ const snippet = text.length > 100 ? text.substring(0, 100) + '...' : text
+ const marks = span.querySelectorAll('mark')
+ marks.forEach((mark) => {
+ hits.push({
+ element: mark,
+ snippet: snippet,
+ page: currentPage.value,
+ index: parseInt(mark.getAttribute('data-hit-index'))
+ })
+ })
}
})
+ totalHits.value = hits.length
+ hitList.value = hits
+ currentHitIndex.value = 0
+
// Scroll to first match
- if (firstMatch) {
- setTimeout(() => {
- firstMatch.scrollIntoView({ behavior: 'smooth', block: 'center' })
- }, 100)
+ if (hits.length > 0) {
+ scrollToHit(0)
}
}
+function scrollToHit(index) {
+ if (index < 0 || index >= hitList.value.length) return
+
+ const hit = hitList.value[index]
+ if (!hit || !hit.element) return
+
+ // Remove active class from all marks
+ hitList.value.forEach(h => {
+ if (h.element) {
+ h.element.classList.remove('search-highlight-active')
+ }
+ })
+
+ // Add active class to current hit
+ hit.element.classList.add('search-highlight-active')
+
+ // Scroll to current hit
+ setTimeout(() => {
+ hit.element.scrollIntoView({ behavior: 'smooth', block: 'center' })
+ }, 100)
+}
+
+function nextHit() {
+ if (totalHits.value === 0) return
+
+ currentHitIndex.value = (currentHitIndex.value + 1) % totalHits.value
+ scrollToHit(currentHitIndex.value)
+}
+
+function prevHit() {
+ if (totalHits.value === 0) return
+
+ currentHitIndex.value = currentHitIndex.value === 0
+ ? totalHits.value - 1
+ : currentHitIndex.value - 1
+ scrollToHit(currentHitIndex.value)
+}
+
+function jumpToHit(index) {
+ if (index < 0 || index >= hitList.value.length) return
+
+ currentHitIndex.value = index
+ scrollToHit(index)
+ jumpListOpen.value = false
+}
+
async function renderPage(pageNum) {
if (!pdfDoc || componentIsUnmounting) return
@@ -310,12 +466,14 @@ async function renderPage(pageNum) {
try {
const textContent = await page.getTextContent()
- pdfjsLib.renderTextLayer({
+
+ // PDF.js 4.x uses TextLayer class instead of renderTextLayer function
+ const textLayerRender = new pdfjsLib.TextLayer({
textContentSource: textContent,
container: textLayer.value,
- viewport: viewport,
- textDivs: []
+ viewport: viewport
})
+ await textLayerRender.render()
// Highlight search terms if query exists
if (searchQuery.value) {
@@ -364,6 +522,12 @@ async function nextPage() {
currentPage.value += 1
pageInput.value = currentPage.value
await renderPage(currentPage.value)
+
+ // Update URL hash and dispatch event
+ window.location.hash = `#p=${currentPage.value}`
+ window.dispatchEvent(new CustomEvent('navidocs:pagechange', {
+ detail: { page: currentPage.value }
+ }))
}
async function previousPage() {
@@ -371,6 +535,12 @@ async function previousPage() {
currentPage.value -= 1
pageInput.value = currentPage.value
await renderPage(currentPage.value)
+
+ // Update URL hash and dispatch event
+ window.location.hash = `#p=${currentPage.value}`
+ window.dispatchEvent(new CustomEvent('navidocs:pagechange', {
+ detail: { page: currentPage.value }
+ }))
}
async function goToPage() {
@@ -383,11 +553,26 @@ async function goToPage() {
if (page >= 1 && page <= totalPages.value) {
currentPage.value = page
await renderPage(currentPage.value)
+
+ // Update URL hash for deep linking
+ window.location.hash = `#p=${currentPage.value}`
+
+ // Dispatch custom event for page change
+ window.dispatchEvent(new CustomEvent('navidocs:pagechange', {
+ detail: { page: currentPage.value }
+ }))
} else {
pageInput.value = currentPage.value
}
}
+// Handle TOC navigation jumps
+function handleTocJump(pageNumber) {
+ const clamped = Math.max(1, Math.min(pageNumber, totalPages.value))
+ pageInput.value = clamped
+ goToPage()
+}
+
watch(
() => route.query.page,
async (newPage) => {
@@ -467,6 +652,35 @@ async function resetDocumentState() {
onMounted(() => {
loadDocument()
+
+ // Handle deep links (#p=12)
+ const hash = window.location.hash
+ if (hash.startsWith('#p=')) {
+ const pageNum = parseInt(hash.substring(3), 10)
+ if (!Number.isNaN(pageNum) && pageNum >= 1) {
+ currentPage.value = pageNum
+ pageInput.value = pageNum
+ }
+ }
+
+ // Listen for hash changes
+ const handleHashChange = () => {
+ const newHash = window.location.hash
+ if (newHash.startsWith('#p=')) {
+ const pageNum = parseInt(newHash.substring(3), 10)
+ if (!Number.isNaN(pageNum) && pageNum >= 1 && pageNum <= totalPages.value) {
+ pageInput.value = pageNum
+ goToPage()
+ }
+ }
+ }
+
+ window.addEventListener('hashchange', handleHashChange)
+
+ // Clean up listener
+ onBeforeUnmount(() => {
+ window.removeEventListener('hashchange', handleHashChange)
+ })
})
onBeforeUnmount(() => {
@@ -527,15 +741,33 @@ onBeforeUnmount(() => {
padding: 2px 0;
border-radius: 2px;
font-weight: 600;
- animation: highlight-pulse 1.5s ease-in-out;
+ transition: background-color 0.2s ease;
}
-@keyframes highlight-pulse {
+.search-highlight-active {
+ background-color: rgba(255, 92, 178, 0.8) !important;
+ color: #fff !important;
+ box-shadow: 0 0 0 2px rgba(255, 92, 178, 0.4);
+ animation: active-pulse 1.5s ease-in-out;
+}
+
+@keyframes active-pulse {
0%, 100% {
- background-color: rgba(255, 215, 0, 0.6);
+ background-color: rgba(255, 92, 178, 0.8);
}
50% {
- background-color: rgba(255, 215, 0, 0.9);
+ background-color: rgba(255, 92, 178, 1);
}
}
+
+.viewer-wrapper {
+ display: flex;
+ min-height: calc(100vh - 64px); /* Account for header */
+}
+
+.pdf-pane {
+ flex: 1;
+ min-width: 0; /* Allow flex item to shrink */
+ overflow-x: auto;
+}
diff --git a/client/src/views/SearchView.vue b/client/src/views/SearchView.vue
index 1356df1..4def9ed 100644
--- a/client/src/views/SearchView.vue
+++ b/client/src/views/SearchView.vue
@@ -14,6 +14,7 @@
NaviDocs
+
@@ -29,7 +30,7 @@
@input="performSearch"
type="text"
class="w-full h-12 px-5 pr-14 rounded-xl border-2 border-white/20 bg-white/10 backdrop-blur-lg text-white placeholder-white/50 shadow-lg focus:outline-none focus:border-pink-400 focus:ring-2 focus:ring-pink-400/20 transition-all duration-200"
- placeholder="Search your manuals..."
+ :placeholder="$t('search.placeholder')"
autofocus
/>
@@ -44,7 +45,7 @@
-
{{ results.length }} results
+
{{ $t('search.resultsCount', { count: results.length }) }}
{{ searchTime }}ms
@@ -62,18 +63,28 @@
@@ -126,10 +190,10 @@
-
No results found
-
Try different keywords or check your spelling
+
{{ $t('search.noResults') }}
+
{{ $t('search.noResultsHint') }}
@@ -151,6 +215,7 @@
import { ref, onMounted, watch } from 'vue'
import { useRoute, useRouter } from 'vue-router'
import { useSearch } from '../composables/useSearch'
+import LanguageSwitcher from '../components/LanguageSwitcher.vue'
const route = useRoute()
const router = useRouter()
@@ -158,6 +223,8 @@ const router = useRouter()
const { results, loading, searchTime, search } = useSearch()
const searchQuery = ref(route.query.q || '')
const activePreview = ref(null)
+const expandedId = ref(null)
+const contextCache = ref({})
let previewTimer = null
async function performSearch() {
@@ -213,10 +280,43 @@ function viewDocument(result) {
})
}
+function jumpToSection(result) {
+ router.push(`/document/${result.docId}?page=${result.pageNumber}#p=${result.pageNumber}`)
+}
+
function handleImageError(event) {
event.target.closest('.nv-popover')?.remove()
}
+function shouldShowSectionHeader(result, index) {
+ if (index === 0) return true // Always show for first result
+ const prevResult = results.value[index - 1]
+ return result.sectionKey !== prevResult?.sectionKey
+}
+
+async function toggleExpand(result) {
+ const resultId = result.id
+
+ if (expandedId.value === resultId) {
+ expandedId.value = null
+ return
+ }
+
+ expandedId.value = resultId
+
+ // Fetch context if not cached
+ if (!contextCache.value[resultId]) {
+ try {
+ const response = await fetch(`/api/context?docId=${result.docId}&page=${result.pageNumber}`)
+ if (response.ok) {
+ contextCache.value[resultId] = await response.json()
+ }
+ } catch (error) {
+ console.error('Failed to fetch context:', error)
+ }
+ }
+}
+
// Watch for query changes from URL
watch(() => route.query.q, (newQuery) => {
searchQuery.value = newQuery || ''
@@ -332,6 +432,22 @@ onMounted(() => {
border-color: rgba(255, 230, 102, 0.5);
}
+.nv-chip-text {
+ font-size: 11px;
+ padding: 3px 8px;
+ border-radius: 8px;
+ background: rgba(207, 167, 255, 0.12);
+ color: #cfa7ff;
+ border: 1px solid rgba(207, 167, 255, 0.35);
+ cursor: pointer;
+ transition: all 0.15s ease;
+}
+
+.nv-chip-text:hover {
+ background: rgba(207, 167, 255, 0.2);
+ border-color: rgba(207, 167, 255, 0.5);
+}
+
.nv-link {
color: #cfa7ff;
font-weight: 500;
@@ -364,6 +480,119 @@ onMounted(() => {
height: 48px !important;
}
+/* Section header grouping */
+.nv-section-header {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ padding: 12px 0 8px 0;
+ margin-top: 16px;
+ font-size: 13px;
+ font-weight: 600;
+ color: #cfa7ff;
+ letter-spacing: 0.02em;
+}
+
+.nv-section-header:first-child {
+ margin-top: 0;
+}
+
+/* Inline expansion panel */
+.nv-expand {
+ margin-top: 12px;
+ padding-top: 12px;
+ border-top: 1px solid rgba(255, 255, 255, 0.1);
+}
+
+.nv-expand-loading {
+ display: flex;
+ align-items: center;
+ gap: 8px;
+ font-size: 12px;
+ color: #9aa0a6;
+ padding: 12px 0;
+}
+
+.spinner {
+ width: 14px;
+ height: 14px;
+ border: 2px solid rgba(207, 167, 255, 0.3);
+ border-top-color: #cfa7ff;
+ border-radius: 50%;
+ animation: spin 0.8s linear infinite;
+}
+
+@keyframes spin {
+ to { transform: rotate(360deg); }
+}
+
+.nv-context-pages {
+ display: flex;
+ gap: 12px;
+ margin-bottom: 12px;
+ overflow-x: auto;
+}
+
+.nv-context-page {
+ flex-shrink: 0;
+ text-align: center;
+}
+
+.nv-context-image {
+ width: 100px;
+ height: 100px;
+ background: rgba(255, 255, 255, 0.05);
+ border: 1px solid rgba(255, 255, 255, 0.1);
+ border-radius: 6px;
+ overflow: hidden;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+}
+
+.nv-context-page.active .nv-context-image {
+ border-color: rgba(207, 167, 255, 0.5);
+ box-shadow: 0 0 0 2px rgba(207, 167, 255, 0.2);
+}
+
+.nv-context-image img {
+ width: 100%;
+ height: 100%;
+ object-fit: cover;
+}
+
+.nv-context-noimage {
+ font-size: 10px;
+ color: #6b6b7a;
+ text-align: center;
+ padding: 8px;
+}
+
+.nv-context-page figcaption {
+ margin-top: 4px;
+ font-size: 10px;
+ color: #9aa0a6;
+}
+
+.nv-context-page.active figcaption {
+ color: #cfa7ff;
+ font-weight: 600;
+}
+
+.nv-expand-text {
+ padding: 8px 12px;
+ background: rgba(255, 255, 255, 0.03);
+ border-radius: 6px;
+ max-height: 200px;
+ overflow-y: auto;
+}
+
+.nv-expand-text .nv-snippet {
+ font-size: 14px;
+ line-height: 1.6;
+ margin: 0;
+}
+
@media (max-width: 768px) {
.nv-doc {
display: none;
diff --git a/server/db/migrations/002_add_document_toc.sql b/server/db/migrations/002_add_document_toc.sql
new file mode 100644
index 0000000..7ba98c7
--- /dev/null
+++ b/server/db/migrations/002_add_document_toc.sql
@@ -0,0 +1,35 @@
+-- Migration: Add document_toc table for interactive table of contents
+-- Date: 2025-10-20
+-- Description: Store extracted TOC entries from PDF documents for navigation
+
+CREATE TABLE IF NOT EXISTS document_toc (
+ id TEXT PRIMARY KEY,
+ document_id TEXT NOT NULL,
+
+ -- TOC entry details
+ title TEXT NOT NULL, -- "Chapter 4 - Plumbing System"
+ section_key TEXT, -- "4" or "4.1.2" for hierarchical entries
+ page_start INTEGER NOT NULL, -- Target page number
+
+ -- Hierarchy support
+ level INTEGER DEFAULT 1, -- 1 for "4", 2 for "4.1", 3 for "4.1.2"
+ parent_id TEXT, -- Reference to parent entry for nesting
+
+ -- Ordering
+ order_index INTEGER NOT NULL, -- Sequential order in TOC
+
+ -- Source tracking
+ toc_page_number INTEGER, -- Which page the TOC entry was found on
+
+ -- Metadata
+ created_at INTEGER NOT NULL,
+
+ FOREIGN KEY (document_id) REFERENCES documents(id) ON DELETE CASCADE,
+ FOREIGN KEY (parent_id) REFERENCES document_toc(id) ON DELETE CASCADE
+);
+
+-- Indexes for performance
+CREATE INDEX IF NOT EXISTS idx_toc_document ON document_toc(document_id);
+CREATE INDEX IF NOT EXISTS idx_toc_order ON document_toc(document_id, order_index);
+CREATE INDEX IF NOT EXISTS idx_toc_parent ON document_toc(parent_id);
+CREATE INDEX IF NOT EXISTS idx_toc_section ON document_toc(document_id, section_key);
diff --git a/server/index.js b/server/index.js
index aa5810b..22f4791 100644
--- a/server/index.js
+++ b/server/index.js
@@ -90,6 +90,8 @@ import searchRoutes from './routes/search.js';
import documentsRoutes from './routes/documents.js';
import imagesRoutes from './routes/images.js';
import statsRoutes from './routes/stats.js';
+import contextRoutes from './routes/context.js';
+import tocRoutes from './routes/toc.js';
// API routes
app.use('/api/upload/quick-ocr', quickOcrRoutes);
@@ -98,6 +100,8 @@ app.use('/api/jobs', jobsRoutes);
app.use('/api/search', searchRoutes);
app.use('/api/documents', documentsRoutes);
app.use('/api/stats', statsRoutes);
+app.use('/api/context', contextRoutes);
+app.use('/api', tocRoutes); // Handles /api/documents/:id/toc paths
app.use('/api', imagesRoutes);
// Error handling
diff --git a/server/package.json b/server/package.json
index c15b22b..4ece237 100644
--- a/server/package.json
+++ b/server/package.json
@@ -29,6 +29,7 @@
"helmet": "^7.0.0",
"ioredis": "^5.0.0",
"jsonwebtoken": "^9.0.0",
+ "lru-cache": "^11.2.2",
"meilisearch": "^0.41.0",
"multer": "^1.4.5-lts.1",
"pdf-img-convert": "^2.0.0",
diff --git a/server/routes/toc.js b/server/routes/toc.js
new file mode 100644
index 0000000..9df66f1
--- /dev/null
+++ b/server/routes/toc.js
@@ -0,0 +1,97 @@
+/**
+ * TOC Route - Table of Contents API
+ * GET /api/documents/:documentId/toc - Get TOC for document
+ * POST /api/documents/:documentId/toc/extract - Trigger TOC extraction
+ */
+
+import express from 'express';
+import { LRUCache } from 'lru-cache';
+import { getDocumentToc, buildTocTree, extractTocFromDocument } from '../services/toc-extractor.js';
+
+const router = express.Router();
+
+// LRU cache for TOC results
+const tocCache = new LRUCache({
+ max: 200,
+ ttl: 1000 * 60 * 30 // 30 minutes
+});
+
+/**
+ * GET /api/documents/:documentId/toc
+ * Get Table of Contents for a document
+ *
+ * @param {string} documentId - Document UUID
+ * @query {string} format - "flat" (default) or "tree"
+ * @returns {Object} { entries: Array, format: string }
+ */
+router.get('/documents/:documentId/toc', async (req, res) => {
+ try {
+ const { documentId } = req.params;
+ const format = req.query.format || 'flat';
+
+ const cacheKey = `toc:${documentId}:${format}`;
+ let entries = tocCache.get(cacheKey);
+ if (!entries) {
+ entries = getDocumentToc(documentId);
+ tocCache.set(cacheKey, entries);
+ }
+
+ if (format === 'tree') {
+ const tree = buildTocTree(entries);
+ return res.json({ entries: tree, format: 'tree', count: entries.length });
+ }
+
+ res.json({ entries, format: 'flat', count: entries.length });
+
+ } catch (error) {
+ console.error('TOC fetch error:', error);
+ res.status(500).json({
+ error: 'Failed to fetch TOC',
+ message: error.message
+ });
+ }
+});
+
+/**
+ * POST /api/documents/:documentId/toc/extract
+ * Trigger TOC extraction for a document
+ *
+ * @param {string} documentId - Document UUID
+ * @returns {Object} { success: boolean, entriesCount: number, pages: number[] }
+ */
+router.post('/documents/:documentId/toc/extract', async (req, res) => {
+ try {
+ const { documentId } = req.params;
+
+ const result = await extractTocFromDocument(documentId);
+
+ if (!result.success) {
+ return res.status(400).json({
+ error: 'TOC extraction failed',
+ message: result.error || result.message
+ });
+ }
+
+ // Invalidate cache after extraction
+ tocCache.delete(`toc:${documentId}:flat`);
+ tocCache.delete(`toc:${documentId}:tree`);
+
+ res.json({
+ success: true,
+ entriesCount: result.entriesCount,
+ tocPages: result.pages,
+ message: result.entriesCount > 0
+ ? `Extracted ${result.entriesCount} TOC entries from ${result.pages.length} page(s)`
+ : 'No TOC detected in document'
+ });
+
+ } catch (error) {
+ console.error('TOC extraction error:', error);
+ res.status(500).json({
+ error: 'TOC extraction failed',
+ message: error.message
+ });
+ }
+});
+
+export default router;
diff --git a/server/services/section-extractor.js b/server/services/section-extractor.js
new file mode 100644
index 0000000..9877e25
--- /dev/null
+++ b/server/services/section-extractor.js
@@ -0,0 +1,265 @@
+/**
+ * Section Extractor Service
+ *
+ * Extracts section/chapter metadata from PDFs using a three-tier approach:
+ * 1. PDF Outline/Bookmarks (most reliable)
+ * 2. Header Detection via Regex (fallback)
+ * 3. Table of Contents Parsing (last resort)
+ */
+
+import pdf from 'pdf-parse';
+import fs from 'fs';
+import { promisify } from 'util';
+
+const readFile = promisify(fs.readFile);
+
+/**
+ * Slugify section title for consistent keys
+ */
+function slugify(text) {
+ return text
+ .toLowerCase()
+ .replace(/[^\w\s.-]/g, '')
+ .replace(/\s+/g, '-')
+ .replace(/^-+|-+$/g, '');
+}
+
+/**
+ * Parse section number to determine order
+ * Examples: "8" -> 800, "8.6" -> 806, "8-6" -> 806, "8/6" -> 806
+ */
+function parseSectionOrder(sectionNum) {
+ if (!sectionNum) return 0;
+
+ // Normalize separators: treat -, /, . the same
+ const normalized = sectionNum.replace(/[-\/]/g, '.');
+ const parts = normalized.split('.').map(p => parseInt(p) || 0);
+
+ // Major * 100 + minor * 1
+ return (parts[0] || 0) * 100 + (parts[1] || 0);
+}
+
+/**
+ * Extract sections from PDF outline/bookmarks
+ * This is the most reliable method when available
+ */
+async function extractFromOutline(pdfPath) {
+ try {
+ const dataBuffer = await readFile(pdfPath);
+ const data = await pdf(dataBuffer, {
+ max: 0 // Don't extract text, just metadata
+ });
+
+ if (!data.metadata || !data.metadata.info) {
+ return null;
+ }
+
+ // pdf-parse doesn't expose outlines directly, we need pdf-lib or pdfjs-dist
+ // For now, return null to fall through to other methods
+ return null;
+ } catch (error) {
+ console.error('[SectionExtractor] Outline extraction failed:', error.message);
+ return null;
+ }
+}
+
+/**
+ * Detect section headers using regex patterns
+ * Looks for patterns like:
+ * - "8. Waste Systems"
+ * - "8.6 Blackwater Tank"
+ * - "CHAPTER 8: WASTE SYSTEMS"
+ */
+function detectSectionHeaders(pages) {
+ const sections = [];
+ let currentSection = null;
+ let currentSectionOrder = 0;
+
+ // Patterns to match section headers (marine manual focused)
+ const headerPatterns = [
+ // "8.6 Blackwater Tank" or "8-6 Bilge System" or "8/6 Through-Hull"
+ /^\s*(\d+(?:[.\-\/]\d+)*)\s+([A-Z][^\n]{3,60})/m,
+ // "CHAPTER 8: WASTE SYSTEMS" or "SECTION 8.6: Blackwater"
+ /^\s*(?:CHAPTER|SECTION|PART)\s+(\d+(?:[.\-\/]\d+)*)[:\s]+([A-Z][^\n]{3,60})/mi,
+ // Marine-specific: "ELECTRICAL SYSTEM", "PLUMBING", "NAVIGATION EQUIPMENT"
+ /^\s*([A-Z][A-Z\s\-]{4,59})$/m,
+ // TOC style: "8.6 Blackwater" at page start
+ /^(\d+(?:[.\-\/]\d+)*)\s+([A-Z][a-z][^\n]{3,50})/m,
+ ];
+
+ for (const page of pages) {
+ const { pageNumber, text } = page;
+
+ if (!text || text.length < 10) continue;
+
+ // Try each pattern
+ let matched = false;
+ for (const pattern of headerPatterns) {
+ const match = text.match(pattern);
+ if (match) {
+ let sectionNum = match[1];
+ let sectionTitle = match[2] || match[1];
+
+ // Skip if it's just the page number
+ if (sectionTitle.length < 5) continue;
+
+ // Clean up title
+ sectionTitle = sectionTitle.trim();
+ if (sectionTitle.endsWith(':')) {
+ sectionTitle = sectionTitle.slice(0, -1);
+ }
+
+ // Calculate section order
+ const order = sectionNum && /\d/.test(sectionNum)
+ ? parseSectionOrder(sectionNum)
+ : currentSectionOrder + 1;
+
+ // Create section key (hierarchical path)
+ const sectionKey = slugify(sectionTitle);
+
+ currentSection = {
+ section: sectionTitle,
+ sectionKey: sectionKey,
+ sectionOrder: order,
+ startPage: pageNumber
+ };
+
+ currentSectionOrder = order;
+ sections.push(currentSection);
+ matched = true;
+ break;
+ }
+ }
+
+ // If we found a section, continue to next page
+ if (matched) continue;
+
+ // Otherwise, assign current section to this page
+ if (!currentSection) {
+ // No section yet, create a default one
+ currentSection = {
+ section: 'Introduction',
+ sectionKey: 'introduction',
+ sectionOrder: 0,
+ startPage: pageNumber
+ };
+ sections.push(currentSection);
+ }
+ }
+
+ return sections;
+}
+
+/**
+ * Parse Table of Contents to extract section structure
+ * Looks for pages with dense "8.6 Title ........ 73" style entries
+ */
+function parseTableOfContents(pages) {
+ const sections = [];
+
+ // Pattern to match TOC entries: "8.6 Blackwater Tank ........ 73"
+ const tocPattern = /^\s*(\d+(?:\.\d+)*)\s+([^.\d][^\n]{3,50}?)[\s.]+(\d+)\s*$/gm;
+
+ for (const page of pages) {
+ const { text } = page;
+ if (!text) continue;
+
+ // Look for pages with multiple TOC-style entries
+ const matches = [...text.matchAll(tocPattern)];
+
+ if (matches.length >= 3) { // Likely a TOC page if 3+ entries
+ console.log(`[SectionExtractor] Found TOC page with ${matches.length} entries`);
+
+ for (const match of matches) {
+ const sectionNum = match[1];
+ const sectionTitle = match[2].trim();
+ const pageNum = parseInt(match[3]);
+
+ if (pageNum > 0 && sectionTitle.length >= 5) {
+ sections.push({
+ section: sectionTitle,
+ sectionKey: slugify(sectionTitle),
+ sectionOrder: parseSectionOrder(sectionNum),
+ startPage: pageNum
+ });
+ }
+ }
+
+ // If we found a TOC, we're done
+ if (sections.length > 0) {
+ return sections;
+ }
+ }
+ }
+
+ return sections.length > 0 ? sections : null;
+}
+
+/**
+ * Main extraction function - tries all methods in order
+ */
+export async function extractSections(pdfPath, pages) {
+ console.log('[SectionExtractor] Starting section extraction');
+
+ // Method 1: Try PDF outline/bookmarks
+ let sections = await extractFromOutline(pdfPath);
+ if (sections && sections.length > 0) {
+ console.log(`[SectionExtractor] Extracted ${sections.length} sections from PDF outline`);
+ return sections;
+ }
+
+ // Method 2: Try Table of Contents parsing
+ sections = parseTableOfContents(pages);
+ if (sections && sections.length > 0) {
+ console.log(`[SectionExtractor] Extracted ${sections.length} sections from TOC`);
+ return sections;
+ }
+
+ // Method 3: Try header detection
+ sections = detectSectionHeaders(pages);
+ if (sections && sections.length > 0) {
+ console.log(`[SectionExtractor] Detected ${sections.length} sections from headers`);
+ return sections;
+ }
+
+ console.log('[SectionExtractor] No sections found, using single section');
+
+ // Fallback: Single section for entire document
+ return [{
+ section: 'Complete Manual',
+ sectionKey: 'complete-manual',
+ sectionOrder: 0,
+ startPage: 1
+ }];
+}
+
+/**
+ * Map pages to their sections
+ * Given extracted sections and pages, assigns each page to a section
+ */
+export function mapPagesToSections(sections, totalPages) {
+ const pageMap = new Map();
+
+ // Sort sections by start page
+ const sortedSections = [...sections].sort((a, b) => a.startPage - b.startPage);
+
+ // For each section, determine its page range
+ for (let i = 0; i < sortedSections.length; i++) {
+ const section = sortedSections[i];
+ const nextSection = sortedSections[i + 1];
+
+ const startPage = section.startPage;
+ const endPage = nextSection ? nextSection.startPage - 1 : totalPages;
+
+ // Assign all pages in this range to this section
+ for (let pageNum = startPage; pageNum <= endPage; pageNum++) {
+ pageMap.set(pageNum, {
+ section: section.section,
+ sectionKey: section.sectionKey,
+ sectionOrder: section.sectionOrder
+ });
+ }
+ }
+
+ return pageMap;
+}
diff --git a/server/services/toc-extractor.js b/server/services/toc-extractor.js
new file mode 100644
index 0000000..03a7f29
--- /dev/null
+++ b/server/services/toc-extractor.js
@@ -0,0 +1,591 @@
+/**
+ * TOC Extractor Service
+ * Detects and extracts Table of Contents from OCR'd document pages
+ */
+
+import { v4 as uuidv4 } from 'uuid';
+import { getDb } from '../db/db.js';
+import fs from 'fs/promises';
+import * as pdfjsLib from 'pdfjs-dist/legacy/build/pdf.mjs';
+
+/**
+ * TOC entry patterns to match:
+ * - "Chapter 4 – Plumbing System ........ 72"
+ * - "4.1 Water System.....................45"
+ * - "Section 3: Electrical . . . . . . . 89"
+ * - "Introduction 12"
+ */
+const TOC_PATTERNS = [
+ // Pattern 1: Title [dots/spaces] PageNum
+ /^(.{3,150?}?)\s*[.\s–-]{3,}\s*(\d{1,4})\s*$/,
+
+ // Pattern 2: SectionKey Title [dots/spaces] PageNum
+ /^([\d.]+)\s+(.{3,100}?)\s*[.\s–-]{3,}\s*(\d{1,4})\s*$/,
+
+ // Pattern 3: Title [whitespace] PageNum (simpler)
+ /^(.{5,120}?)\s{3,}(\d{1,4})\s*$/,
+];
+
+/**
+ * Detect if a page looks like a TOC page
+ * @param {string} pageText - OCR text from page
+ * @returns {boolean}
+ */
+function isTocPage(pageText) {
+ if (!pageText || pageText.length < 100) return false;
+
+ const lines = pageText.split('\n').map(l => l.trim()).filter(l => l.length > 5);
+ if (lines.length < 5) return false;
+
+ // Count how many lines match TOC patterns
+ let matchCount = 0;
+ let pageNumbers = [];
+
+ for (const line of lines) {
+ for (const pattern of TOC_PATTERNS) {
+ if (pattern.test(line)) {
+ matchCount++;
+ const match = line.match(pattern);
+ const pageNum = parseInt(match[match.length - 1]);
+ if (!isNaN(pageNum)) {
+ pageNumbers.push(pageNum);
+ }
+ break;
+ }
+ }
+ }
+
+ // Heuristics for TOC detection:
+ // 1. At least 5 matching lines
+ // 2. At least 30% of lines match TOC patterns
+ // 3. Page numbers are somewhat sequential or grouped
+ const matchRatio = matchCount / lines.length;
+ const hasSequentialPages = checkSequentiality(pageNumbers);
+
+ return matchCount >= 5 && matchRatio >= 0.3 && hasSequentialPages;
+}
+
+/**
+ * Check if page numbers show some sequentiality
+ * @param {number[]} pageNumbers
+ * @returns {boolean}
+ */
+function checkSequentiality(pageNumbers) {
+ if (pageNumbers.length < 3) return false;
+
+ // Sort and check for general increasing trend
+ const sorted = [...pageNumbers].sort((a, b) => a - b);
+ let increases = 0;
+
+ for (let i = 1; i < sorted.length; i++) {
+ if (sorted[i] >= sorted[i - 1]) increases++;
+ }
+
+ // At least 70% should be increasing
+ return (increases / (sorted.length - 1)) >= 0.7;
+}
+
+/**
+ * Parse section key and determine hierarchy level
+ * @param {string} sectionKey - e.g., "4", "4.1", "4.1.2"
+ * @returns {{ key: string, level: number }}
+ */
+function parseSectionKey(sectionKey) {
+ if (!sectionKey) return { key: null, level: 1 };
+
+ const trimmed = sectionKey.trim();
+ const parts = trimmed.split('.');
+
+ return {
+ key: trimmed,
+ level: parts.length
+ };
+}
+
+/**
+ * Extract TOC entries from a page
+ * @param {string} pageText
+ * @param {number} pageNumber
+ * @returns {Array