legco_ai_assistant/backend/app/services/highlight_cache.py

95 lines
3.1 KiB
Python

"""Highlight result cache.
Stores pre-computed highlighted chunk HTML pages in SQLite for instant retrieval.
Uses sync sqlite3 — all operations are instant local reads/writes.
Each method opens its own connection.
"""
import hashlib
import json
import logging
import sqlite3
logger = logging.getLogger(__name__)
def _connect(db_path: str) -> sqlite3.Connection:
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
return conn
def compute_cache_key(document_id: str, chunk_index: int, sub_question: str) -> str:
"""Deterministic cache key: sha256 hash of (document_id, chunk_index, sub_question)."""
raw = f"{document_id}|{chunk_index}|{sub_question}"
return hashlib.sha256(raw.encode("utf-8")).hexdigest()
class HighlightCache:
def __init__(self, db_path: str) -> None:
self._db_path = db_path
self._init_table()
def _init_table(self) -> None:
"""Create table if not exists (idempotent)."""
with _connect(self._db_path) as conn:
conn.execute(
"""
CREATE TABLE IF NOT EXISTS chunk_highlights (
id INTEGER PRIMARY KEY AUTOINCREMENT,
cache_key TEXT UNIQUE NOT NULL,
document_id TEXT NOT NULL,
chunk_index INTEGER NOT NULL,
sub_question TEXT NOT NULL,
relevant_sentences_json TEXT NOT NULL,
html_content TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
"""
)
conn.execute(
"""
CREATE INDEX IF NOT EXISTS idx_highlights_cache_key
ON chunk_highlights(cache_key)
"""
)
conn.commit()
def get_highlight(self, cache_key: str) -> str | None:
"""Retrieve cached HTML content by cache key. Returns None if not found."""
with _connect(self._db_path) as conn:
row = conn.execute(
"SELECT html_content FROM chunk_highlights WHERE cache_key = ?",
(cache_key,),
).fetchone()
if row is None:
return None
return row["html_content"]
def set_highlight(
self,
cache_key: str,
document_id: str,
chunk_index: int,
sub_question: str,
relevant_sentences_json: str,
html_content: str,
) -> None:
"""Store highlighted HTML in cache. Overwrites existing entry."""
with _connect(self._db_path) as conn:
conn.execute(
"""INSERT OR REPLACE INTO chunk_highlights
(cache_key, document_id, chunk_index, sub_question,
relevant_sentences_json, html_content)
VALUES (?, ?, ?, ?, ?, ?)""",
(
cache_key,
document_id,
chunk_index,
sub_question,
relevant_sentences_json,
html_content,
),
)
conn.commit()