diff --git a/.plans/package5_enhancement_plan.md b/.plans/package5_enhancement_plan.md index cd9a922..ea24cc5 100644 --- a/.plans/package5_enhancement_plan.md +++ b/.plans/package5_enhancement_plan.md @@ -6,7 +6,7 @@ - Phase 5.2: Fix missing PDF links in citations and improve citation robustness - Phase 5.3: Generate per-chunk PDFs for DOCX/TXT documents at ingestion time - Phase 5.4: LLM-based sentence-level highlighting with eager background batch computation -**Status**: Phases 5.1 ✅, 5.2 ✅, 5.3 ✅, 5.4 ✅ — All complete (2026-04-29) +**Status**: Phases 5.1 ✅, 5.2 ✅, 5.3 ✅, 5.4 ✅, 5.5 ✅ — All complete (2026-04-29) **Phase 5.4 implemented**: LLM-based sentence-level highlighting with eager background batch computation, SQLite cache, inline citation link upgrades, and toast notification. **Test results (Phase 5 final)**: @@ -676,6 +676,71 @@ Phase 5.4 (LLM Highlighting) - [x] Frontend test suite: 45 passed, 0 failed - [x] Manual verification: batch endpoint returns `cached_count: 1` with real document, GET returns valid HTML with `.highlighted` spans +--- + +## Phase 5.5 — Highlight History Tracking ✅ + +**Source**: User request (2026-04-29) +**Status**: Complete + +Track highlight generation data in the query history database: +1. **`highlight_time_ms`** — duration of the highlight batch LLM call (excluded from `total_time_ms`) +2. **`highlight_prompt`** — the full prompt sent to the LLM for highlight batch +3. **`highlight_response`** — the LLM's structured response (`HighlightBatchResult` as JSON) + +### Architecture + +``` +SSE Stream: ... → phase: "completed" (includes history_id) + │ + ├── Frontend captures historyId from SSE event + │ + └── Frontend useEffect: POST /api/v1/v2/highlights/batch?history_id={id} + │ + ▼ + Backend chunk_highlight_service: + 1. Measures highlight_time_ms via time.perf_counter() + 2. Captures highlight_prompt (full LLM prompt) + 3. Captures highlight_response_json (llm_result.model_dump_json()) + 4. Returns all 3 in HighlightBatchResponse + │ + ▼ + Backend chunks.py endpoint: + If history_id provided → HistoryService.update_highlights() + UPDATE query_history SET highlight_prompt=?, highlight_response=?, highlight_time_ms=? +``` + +### Key Decisions + +| # | Decision | Rationale | +|---|----------|-----------| +| 1 | Synchronous history record at completion (was fire-and-forget) | Need `history_id` for SSE completed event payload. SQLite insert is microseconds — no latency impact. | +| 2 | `update_highlights()` separate from `record()` | Highlights computed AFTER the main query pipeline. Update existing row rather than inserting a new one. | +| 3 | Highlight time **excluded** from `total_time_ms` | `total_time_ms` is measured before highlights are computed. The highlight time is tracked separately. | +| 4 | Pass `history_id` via SSE completed event → frontend → batch POST query param | Avoids restructuring the SSE pipeline. Frontend already triggers batch POST — just adds a query param. | +| 5 | `_record_history` returns `-1` on failure | Legacy call sites (no-results, all-filtered) are unaffected. The completed site checks `if history_id > 0`. | + +### Files Changed + +| File | Change | +|------|--------| +| `backend/app/core/sqlite_db.py` | +3 ALTER TABLE columns: `highlight_prompt`, `highlight_response`, `highlight_time_ms` | +| `backend/app/services/history_service.py` | +3 columns in `_INSERT_COLUMNS`, +`update_highlights()` method | +| `backend/app/models/history.py` | +3 fields in `QueryHistoryRecord` and `QueryHistoryDetail` | +| `backend/app/models/highlight.py` | +3 optional fields in `HighlightBatchResponse` | +| `backend/app/services/chunk_highlight_service.py` | +`import time`, timing around LLM call, capture `highlight_response_json` | +| `backend/app/routers/query.py` | `_record_history` returns int, sync call at completed, `history_id` in SSE event | +| `backend/app/routers/chunks.py` | +`history_id` query param, calls `update_highlights()` on success | +| `frontend/src/lib/queries.tsx` | +`historyId` in `QueryStreamState`, capture from completed event | +| `frontend/src/components/ResponsePanel.tsx` | +`historyId` prop, pass in batch POST URL | +| `frontend/src/pages/LTTPage.tsx` | pass `historyId` from query state to `ResponsePanel` | +| `backend/app/test/test_phase5_highlight_models.py` | updated expected dict for new default fields | + +### Test Results +- Backend: 108 passed, 0 failed +- Frontend: 45 passed, 0 failed +- Total: 153 tests, 0 failures + ## Production Notes ### Vite Proxy Limitation diff --git a/backend/app/core/sqlite_db.py b/backend/app/core/sqlite_db.py index 8b54e82..0bcd724 100644 --- a/backend/app/core/sqlite_db.py +++ b/backend/app/core/sqlite_db.py @@ -154,6 +154,9 @@ def init_history_db(conn: sqlite3.Connection) -> None: profile_used TEXT DEFAULT NULL, chunks_retrieved_per_subq_count TEXT DEFAULT NULL, chunks_filtered_per_subq_count TEXT DEFAULT NULL, + highlight_prompt TEXT DEFAULT NULL, + highlight_response TEXT DEFAULT NULL, + highlight_time_ms INTEGER DEFAULT 0, created_at TEXT NOT NULL DEFAULT (datetime('now')) ) """) @@ -168,6 +171,18 @@ def init_history_db(conn: sqlite3.Connection) -> None: ) except Exception: pass + try: + conn.execute("ALTER TABLE query_history ADD COLUMN highlight_prompt TEXT DEFAULT NULL") + except Exception: + pass + try: + conn.execute("ALTER TABLE query_history ADD COLUMN highlight_response TEXT DEFAULT NULL") + except Exception: + pass + try: + conn.execute("ALTER TABLE query_history ADD COLUMN highlight_time_ms INTEGER DEFAULT 0") + except Exception: + pass conn.commit() logger.info("History DB tables initialized.") diff --git a/backend/app/models/highlight.py b/backend/app/models/highlight.py index 5b98c7f..9a6cb34 100644 --- a/backend/app/models/highlight.py +++ b/backend/app/models/highlight.py @@ -1,4 +1,4 @@ -from typing import Literal +from typing import Literal, Optional from pydantic import BaseModel, Field @@ -36,3 +36,6 @@ class HighlightBatchResponse(BaseModel): status: Literal["completed", "partial", "failed"] cached_count: int = 0 errors: list[str] = Field(default_factory=list) + highlight_prompt: Optional[str] = None + highlight_response_json: Optional[str] = None + highlight_time_ms: int = 0 diff --git a/backend/app/models/history.py b/backend/app/models/history.py index d409228..7498e82 100644 --- a/backend/app/models/history.py +++ b/backend/app/models/history.py @@ -24,6 +24,9 @@ class QueryHistoryRecord(BaseModel): profile_used: Optional[str] = None chunks_retrieved_per_subq_count: Optional[str] = None chunks_filtered_per_subq_count: Optional[str] = None + highlight_prompt: Optional[str] = None + highlight_response: Optional[str] = None + highlight_time_ms: int = 0 class QueryHistorySummary(BaseModel): @@ -57,6 +60,9 @@ class QueryHistoryDetail(BaseModel): profile_used: Optional[str] = None chunks_retrieved_per_subq_count: Optional[str] = None chunks_filtered_per_subq_count: Optional[str] = None + highlight_prompt: Optional[str] = None + highlight_response: Optional[str] = None + highlight_time_ms: int = 0 created_at: str diff --git a/backend/app/routers/chunks.py b/backend/app/routers/chunks.py index ca5d47f..8ff58bf 100644 --- a/backend/app/routers/chunks.py +++ b/backend/app/routers/chunks.py @@ -1,4 +1,5 @@ import logging +from typing import Optional from fastapi import APIRouter, HTTPException, Query, Response @@ -9,6 +10,7 @@ from app.models.highlight import ( ) from app.services.chunk_highlight_service import ChunkHighlightService from app.services.highlight_cache import HighlightCache, compute_cache_key +from app.services.history_service import HistoryService from app.services.llm_client import LLMClient from app.services.rag import RAGService @@ -24,7 +26,10 @@ def _highlights_db_path(settings) -> str: @router.post("/api/v1/v2/highlights/batch", response_model=HighlightBatchResponse) -async def compute_highlights_batch(request: HighlightBatchRequest): +async def compute_highlights_batch( + request: HighlightBatchRequest, + history_id: Optional[int] = Query(default=None), +): """Compute and cache highlighted chunk views for cited chunks.""" settings = get_settings() cache = HighlightCache(db_path=_highlights_db_path(settings)) @@ -38,6 +43,14 @@ async def compute_highlights_batch(request: HighlightBatchRequest): ) try: result = await service.compute_highlights_batch(request.targets) + if history_id is not None: + history_service = HistoryService(settings.history_db_path) + history_service.update_highlights( + history_id, + result.highlight_prompt or "", + result.highlight_response_json or "", + result.highlight_time_ms, + ) return result except Exception as e: logger.error("Highlight batch computation failed: %s", e, exc_info=True) diff --git a/backend/app/routers/query.py b/backend/app/routers/query.py index 2c2517e..77b3ba4 100644 --- a/backend/app/routers/query.py +++ b/backend/app/routers/query.py @@ -120,10 +120,10 @@ async def _record_history(history_service, input_text, extracted_questions, chunks_retrieved_count, chunks_retrieved, filter_prompt, filter_time_ms, chunks_filtered_count, chunks_filtered, generate_prompt, generator_time_ms, profile_used, - final_answer, sources, total_time_ms): - """Record a query to history. Runs as a fire-and-forget task.""" + final_answer, sources, total_time_ms) -> int: + """Record a query to history. Returns the history record ID.""" try: - history_service.record({ + return history_service.record({ "input_text": input_text, "extracted_questions": json.dumps(extracted_questions) if isinstance(extracted_questions, list) else extracted_questions, "decompose_prompt": decompose_prompt, @@ -144,6 +144,7 @@ async def _record_history(history_service, input_text, extracted_questions, }) except Exception: logger.warning("History recording failed", exc_info=True) + return -1 def _schedule_history(history_service, request, extracted_questions, @@ -349,18 +350,21 @@ async def _query_stream(request: QueryRequest): for sq in sub_question_sources ]) - _schedule_history(history_service, request, extracted_questions, + history_id = await _record_history( + history_service, request.question, extracted_questions, decompose_prompt, decomposer_time_ms, retriever_time_ms, chunks_retrieved_count, chunks_retrieved, filter_prompt, filter_time_ms, chunks_filtered_count, chunks_filtered, generate_prompt, generator_time_ms, active_profile, - answer, sources_json, total_time_ms) + answer, sources_json, total_time_ms + ) yield _format_sse({ "phase": "completed", "answer": answer, "sub_question_sources": [sq.model_dump() for sq in sub_question_sources], "sources": [s.model_dump() for s in all_sources_flat], + "history_id": history_id, }) except HTTPException: diff --git a/backend/app/services/chunk_highlight_service.py b/backend/app/services/chunk_highlight_service.py index f5715d5..694313c 100644 --- a/backend/app/services/chunk_highlight_service.py +++ b/backend/app/services/chunk_highlight_service.py @@ -2,6 +2,7 @@ import json import logging +import time from collections import defaultdict from typing import Any @@ -130,6 +131,7 @@ class ChunkHighlightService: prompt = self._build_prompt(fetched) + highlight_start = time.perf_counter() try: llm_result: HighlightBatchResult = await self._llm.complete_structured( prompt, HighlightBatchResult, step_name="HighlightBatch" @@ -139,8 +141,10 @@ class ChunkHighlightService: return HighlightBatchResponse( status="failed", cached_count=0, errors=[str(exc)] ) + highlight_time_ms = int((time.perf_counter() - highlight_start) * 1000) cached_count = self._cache_results(fetched, llm_result) + highlight_response_json = llm_result.model_dump_json() result_ids = {(r.document_id, r.chunk_index) for r in llm_result.results} fetched_ids = {(t.document_id, t.chunk_index) for t, _, _ in fetched} @@ -155,6 +159,9 @@ class ChunkHighlightService: status=status, cached_count=cached_count, errors=errors, + highlight_prompt=prompt, + highlight_response_json=highlight_response_json, + highlight_time_ms=highlight_time_ms, ) def _build_prompt( diff --git a/backend/app/services/history_service.py b/backend/app/services/history_service.py index 21ae786..c35ac14 100644 --- a/backend/app/services/history_service.py +++ b/backend/app/services/history_service.py @@ -27,6 +27,7 @@ _INSERT_COLUMNS = ( "total_time_ms", "final_answer", "sources", "profile_used", "chunks_retrieved_per_subq_count", "chunks_filtered_per_subq_count", + "highlight_prompt", "highlight_response", "highlight_time_ms", ) @@ -86,6 +87,15 @@ class HistoryService: conn.commit() return cursor.rowcount > 0 + def update_highlights(self, query_id: int, highlight_prompt: str, highlight_response: str, highlight_time_ms: int) -> bool: + with _connect(self._db_path) as conn: + cursor = conn.execute( + "UPDATE query_history SET highlight_prompt=?, highlight_response=?, highlight_time_ms=? WHERE id=?", + (highlight_prompt, highlight_response, highlight_time_ms, query_id), + ) + conn.commit() + return cursor.rowcount > 0 + def clear_all(self) -> int: with _connect(self._db_path) as conn: count = conn.execute("SELECT COUNT(*) FROM query_history").fetchone()[0] diff --git a/backend/app/test/test_phase5_highlight_models.py b/backend/app/test/test_phase5_highlight_models.py index 7481005..c390c64 100644 --- a/backend/app/test/test_phase5_highlight_models.py +++ b/backend/app/test/test_phase5_highlight_models.py @@ -364,6 +364,9 @@ class TestSerialization: "status": "partial", "cached_count": 3, "errors": [], + "highlight_prompt": None, + "highlight_response_json": None, + "highlight_time_ms": 0, } def test_relevant_sentence_reason_max_length(self): diff --git a/frontend/src/components/ResponsePanel.tsx b/frontend/src/components/ResponsePanel.tsx index 24432a0..b93c6ba 100644 --- a/frontend/src/components/ResponsePanel.tsx +++ b/frontend/src/components/ResponsePanel.tsx @@ -18,6 +18,7 @@ interface ResponsePanelProps { isLoading?: boolean phase?: string error?: string | null + historyId?: number | null } const CitationLink = ({ href, children }: { href?: string; children?: React.ReactNode }) => ( @@ -161,10 +162,12 @@ function SubQuestionSections({ answer, subQuestionSources, isLoading, + historyId, }: { answer: string | null subQuestionSources: SubQuestionSources[] isLoading?: boolean + historyId?: number | null }) { const [copied, setCopied] = useState(false) const [highlightReadyKeys, setHighlightReadyKeys] = useState>(new Set()) @@ -201,7 +204,10 @@ function SubQuestionSections({ setHighlightStatus('loading') - fetch('http://localhost:8000/api/v1/v2/highlights/batch', { + const url = historyId + ? `http://localhost:8000/api/v1/v2/highlights/batch?history_id=${historyId}` + : 'http://localhost:8000/api/v1/v2/highlights/batch' + fetch(url, { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ targets }), @@ -221,7 +227,7 @@ function SubQuestionSections({ .catch((err) => { console.error('Highlight batch computation failed:', err) }) - }, [answer, isLoading, subQuestionSources]) + }, [answer, isLoading, subQuestionSources, historyId]) const sections = answer ? parseAnswerSections(answer) : [] @@ -517,6 +523,7 @@ export const ResponsePanel: React.FC = ({ isLoading, phase, error, + historyId, }) => { if (subQuestionSources && subQuestionSources.length > 0) { return ( @@ -524,6 +531,7 @@ export const ResponsePanel: React.FC = ({ answer={answer} subQuestionSources={subQuestionSources} isLoading={isLoading} + historyId={historyId} /> ) } diff --git a/frontend/src/lib/queries.tsx b/frontend/src/lib/queries.tsx index c3e8143..7d81208 100644 --- a/frontend/src/lib/queries.tsx +++ b/frontend/src/lib/queries.tsx @@ -18,6 +18,7 @@ export interface QueryStreamState { sources: SourceMetadata[] | null subQuestionSources: SubQuestionSources[] | null phase: 'idle' | 'decomposing' | 'retrieving' | 'filtering' | 'generating' | 'completed' | 'error' + historyId: number | null error: Error | null } @@ -28,6 +29,7 @@ export const useQueryDocumentStream = () => { sources: null, subQuestionSources: null, phase: 'idle', + historyId: null, error: null, }) const abortRef = useRef(null) @@ -39,6 +41,7 @@ export const useQueryDocumentStream = () => { sources: null, subQuestionSources: null, phase: 'decomposing', + historyId: null, error: null, }) @@ -73,6 +76,7 @@ export const useQueryDocumentStream = () => { sources: event.sources ?? null, subQuestionSources: event.sub_question_sources ?? null, phase: 'completed', + historyId: (event as any).history_id ?? null, })) break case 'error': @@ -105,6 +109,7 @@ export const useQueryDocumentStream = () => { sources: null, subQuestionSources: null, phase: 'idle', + historyId: null, error: null, }) }, []) diff --git a/frontend/src/pages/LTTPage.tsx b/frontend/src/pages/LTTPage.tsx index 02b99df..3f7e83b 100644 --- a/frontend/src/pages/LTTPage.tsx +++ b/frontend/src/pages/LTTPage.tsx @@ -63,6 +63,7 @@ export const LTTPage: React.FC = () => { isLoading={queryStream.phase === 'retrieving' || queryStream.phase === 'filtering' || queryStream.phase === 'generating'} phase={queryStream.phase} error={queryStream.error?.message ?? null} + historyId={queryStream.historyId} />