223 lines
6.9 KiB
Python
223 lines
6.9 KiB
Python
"""Tests for Phase 5.4 HighlightCache — SQLite cache for highlighted chunk HTML.
|
|
|
|
Covers:
|
|
- set_highlight stores HTML and relevant_sentences_json
|
|
- get_highlight retrieves cached HTML by cache key
|
|
- get_highlight returns None for missing cache key
|
|
- set_highlight overwrites existing entry (INSERT OR REPLACE)
|
|
- compute_cache_key produces same key for same inputs, different for different sub_questions
|
|
- compute_cache_key is deterministic (same inputs → same hash)
|
|
- get_highlight after set_highlight returns exact HTML content (no corruption)
|
|
- table creation is idempotent (init twice doesn't crash)
|
|
- multiple instances on same DB file share data correctly
|
|
|
|
Uses tmp_path for isolated test databases — no real filesystem pollution.
|
|
"""
|
|
|
|
import json
|
|
|
|
import pytest
|
|
|
|
from app.services.highlight_cache import HighlightCache, compute_cache_key
|
|
|
|
|
|
# ── compute_cache_key ──────────────────────────────────────────────────────
|
|
|
|
|
|
def test_compute_cache_key_same_inputs_same_hash():
|
|
key1 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
key2 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
assert key1 == key2
|
|
|
|
|
|
def test_compute_cache_key_different_sub_question_different_hash():
|
|
key1 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
key2 = compute_cache_key("doc1", 0, "Who proposed it?")
|
|
assert key1 != key2
|
|
|
|
|
|
def test_compute_cache_key_different_document_id_different_hash():
|
|
key1 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
key2 = compute_cache_key("doc2", 0, "What is the budget?")
|
|
assert key1 != key2
|
|
|
|
|
|
def test_compute_cache_key_different_chunk_index_different_hash():
|
|
key1 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
key2 = compute_cache_key("doc1", 1, "What is the budget?")
|
|
assert key1 != key2
|
|
|
|
|
|
def test_compute_cache_key_is_64_char_hex():
|
|
key = compute_cache_key("doc1", 0, "What is the budget?")
|
|
assert len(key) == 64
|
|
assert all(c in "0123456789abcdef" for c in key)
|
|
|
|
|
|
# ── HighlightCache basic CRUD ──────────────────────────────────────────────
|
|
|
|
|
|
def test_set_and_get_highlight(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
cache.set_highlight(
|
|
cache_key="abc123",
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="What is the budget?",
|
|
relevant_sentences_json=json.dumps([0, 1, 2]),
|
|
html_content="<p>highlighted</p>",
|
|
)
|
|
|
|
result = cache.get_highlight("abc123")
|
|
assert result == "<p>highlighted</p>"
|
|
|
|
|
|
def test_get_highlight_missing_returns_none(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
result = cache.get_highlight("nonexistent")
|
|
assert result is None
|
|
|
|
|
|
def test_set_highlight_overwrites_existing(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
cache.set_highlight(
|
|
cache_key="abc123",
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="What is the budget?",
|
|
relevant_sentences_json=json.dumps([0, 1]),
|
|
html_content="<p>first</p>",
|
|
)
|
|
|
|
cache.set_highlight(
|
|
cache_key="abc123",
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="What is the budget?",
|
|
relevant_sentences_json=json.dumps([2, 3]),
|
|
html_content="<p>second</p>",
|
|
)
|
|
|
|
result = cache.get_highlight("abc123")
|
|
assert result == "<p>second</p>"
|
|
|
|
|
|
def test_get_highlight_returns_exact_html_no_corruption(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
html = (
|
|
'<div class="chunk">\n'
|
|
' <p>Line one</p>\n'
|
|
' <mark>Line two</mark>\n'
|
|
' <p>Line three</p>\n'
|
|
'</div>'
|
|
)
|
|
|
|
cache.set_highlight(
|
|
cache_key="key1",
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="Q?",
|
|
relevant_sentences_json=json.dumps([1]),
|
|
html_content=html,
|
|
)
|
|
|
|
result = cache.get_highlight("key1")
|
|
assert result == html
|
|
|
|
|
|
# ── Table init idempotency ─────────────────────────────────────────────────
|
|
|
|
|
|
def test_init_table_is_idempotent(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache1 = HighlightCache(db_path)
|
|
cache1.set_highlight(
|
|
cache_key="k1",
|
|
document_id="d1",
|
|
chunk_index=0,
|
|
sub_question="Q1",
|
|
relevant_sentences_json="[]",
|
|
html_content="<p>hi</p>",
|
|
)
|
|
|
|
# Second init on same DB should not crash
|
|
cache2 = HighlightCache(db_path)
|
|
assert cache2.get_highlight("k1") == "<p>hi</p>"
|
|
|
|
|
|
# ── Multiple instances share data ──────────────────────────────────────────
|
|
|
|
|
|
def test_multiple_instances_share_same_db(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache1 = HighlightCache(db_path)
|
|
cache2 = HighlightCache(db_path)
|
|
|
|
cache1.set_highlight(
|
|
cache_key="shared",
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="Q?",
|
|
relevant_sentences_json="[]",
|
|
html_content="<p>shared</p>",
|
|
)
|
|
|
|
assert cache2.get_highlight("shared") == "<p>shared</p>"
|
|
|
|
|
|
# ── compute_cache_key integration with cache ───────────────────────────────
|
|
|
|
|
|
def test_compute_cache_key_and_round_trip(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
key = compute_cache_key("doc_42", 7, "What was the total spending?")
|
|
cache.set_highlight(
|
|
cache_key=key,
|
|
document_id="doc_42",
|
|
chunk_index=7,
|
|
sub_question="What was the total spending?",
|
|
relevant_sentences_json=json.dumps([3, 4]),
|
|
html_content="<mark>total</mark>",
|
|
)
|
|
|
|
assert cache.get_highlight(key) == "<mark>total</mark>"
|
|
|
|
|
|
def test_different_sub_questions_produce_different_cache_entries(tmp_path):
|
|
db_path = str(tmp_path / "highlights.db")
|
|
cache = HighlightCache(db_path)
|
|
|
|
key1 = compute_cache_key("doc1", 0, "What is the budget?")
|
|
key2 = compute_cache_key("doc1", 0, "Who proposed it?")
|
|
|
|
cache.set_highlight(
|
|
cache_key=key1,
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="What is the budget?",
|
|
relevant_sentences_json="[0]",
|
|
html_content="<p>budget</p>",
|
|
)
|
|
|
|
cache.set_highlight(
|
|
cache_key=key2,
|
|
document_id="doc1",
|
|
chunk_index=0,
|
|
sub_question="Who proposed it?",
|
|
relevant_sentences_json="[1]",
|
|
html_content="<p>proposer</p>",
|
|
)
|
|
|
|
assert cache.get_highlight(key1) == "<p>budget</p>"
|
|
assert cache.get_highlight(key2) == "<p>proposer</p>"
|