"""Highlight result cache. Stores pre-computed highlighted chunk HTML pages in SQLite for instant retrieval. Uses sync sqlite3 — all operations are instant local reads/writes. Each method opens its own connection. """ import hashlib import json import logging import sqlite3 logger = logging.getLogger(__name__) def _connect(db_path: str) -> sqlite3.Connection: conn = sqlite3.connect(db_path) conn.row_factory = sqlite3.Row return conn def compute_cache_key(document_id: str, chunk_index: int, sub_question: str) -> str: """Deterministic cache key: sha256 hash of (document_id, chunk_index, sub_question).""" raw = f"{document_id}|{chunk_index}|{sub_question}" return hashlib.sha256(raw.encode("utf-8")).hexdigest() class HighlightCache: def __init__(self, db_path: str) -> None: self._db_path = db_path self._init_table() def _init_table(self) -> None: """Create table if not exists (idempotent).""" with _connect(self._db_path) as conn: conn.execute( """ CREATE TABLE IF NOT EXISTS chunk_highlights ( id INTEGER PRIMARY KEY AUTOINCREMENT, cache_key TEXT UNIQUE NOT NULL, document_id TEXT NOT NULL, chunk_index INTEGER NOT NULL, sub_question TEXT NOT NULL, relevant_sentences_json TEXT NOT NULL, html_content TEXT NOT NULL, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ) """ ) conn.execute( """ CREATE INDEX IF NOT EXISTS idx_highlights_cache_key ON chunk_highlights(cache_key) """ ) conn.commit() def get_highlight(self, cache_key: str) -> str | None: """Retrieve cached HTML content by cache key. Returns None if not found.""" with _connect(self._db_path) as conn: row = conn.execute( "SELECT html_content FROM chunk_highlights WHERE cache_key = ?", (cache_key,), ).fetchone() if row is None: return None return row["html_content"] def set_highlight( self, cache_key: str, document_id: str, chunk_index: int, sub_question: str, relevant_sentences_json: str, html_content: str, ) -> None: """Store highlighted HTML in cache. Overwrites existing entry.""" with _connect(self._db_path) as conn: conn.execute( """INSERT OR REPLACE INTO chunk_highlights (cache_key, document_id, chunk_index, sub_question, relevant_sentences_json, html_content) VALUES (?, ?, ?, ?, ?, ?)""", ( cache_key, document_id, chunk_index, sub_question, relevant_sentences_json, html_content, ), ) conn.commit()