navidocs/server/migrations/004_add_document_images.sql
ggq-admin 4b91896838 feat: Add image extraction design, database schema, and migration
- Comprehensive image extraction architecture design
- Database schema for document_images table
- Migration 004: Add document_images table with indexes
- Migration runner script
- Design and status documentation

Prepares foundation for image extraction feature with OCR on images.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-10-19 19:47:30 +02:00

30 lines
1.3 KiB
SQL

-- Migration: Add support for extracted images from PDFs
-- Date: 2025-10-19
-- Purpose: Store extracted images, their OCR text, and relationships to document text
CREATE TABLE IF NOT EXISTS document_images (
id TEXT PRIMARY KEY,
documentId TEXT NOT NULL,
pageNumber INTEGER NOT NULL,
imageIndex INTEGER NOT NULL,
imagePath TEXT NOT NULL,
imageFormat TEXT DEFAULT 'png',
width INTEGER,
height INTEGER,
position TEXT, -- JSON: {x, y, width, height} in PDF coordinates
extractedText TEXT, -- OCR text from the image itself
textConfidence REAL, -- Average OCR confidence (0-1)
anchorTextBefore TEXT, -- Text snippet appearing before the image
anchorTextAfter TEXT, -- Text snippet appearing after the image
createdAt INTEGER NOT NULL,
FOREIGN KEY (documentId) REFERENCES documents(id) ON DELETE CASCADE,
UNIQUE(documentId, pageNumber, imageIndex)
);
CREATE INDEX IF NOT EXISTS idx_document_images_doc ON document_images(documentId);
CREATE INDEX IF NOT EXISTS idx_document_images_page ON document_images(documentId, pageNumber);
CREATE INDEX IF NOT EXISTS idx_document_images_created ON document_images(createdAt);
-- Add column to documents table to track if images have been extracted
ALTER TABLE documents ADD COLUMN imagesExtracted INTEGER DEFAULT 0;
ALTER TABLE documents ADD COLUMN imageCount INTEGER DEFAULT 0;