From e49a68b0bdfbef454f661542c376926ac747e617 Mon Sep 17 00:00:00 2001 From: Woody Date: Sat, 25 Apr 2026 21:11:17 +0800 Subject: [PATCH] =?UTF-8?q?feat(prompts):=20Phase=203.2=20=E2=80=94=20Prom?= =?UTF-8?q?pt=20Backend=20(CRUD=20service,=20REST=20API,=2033=20tests)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - PromptService (services/prompt_service.py): full CRUD for 3 profiles A/B/C with seed template reset, validation, and sqlite3.Row access - REST API (routers/prompts.py): 6 endpoints on /api/v1/prompts - Pydantic models (models/prompts.py): 6 schemas - DI wiring (dependencies.py): get_prompt_service() - App registration (main.py): prompts router - Mock fixture (conftest.py): mock_prompt_service - Tests: test_phase3_prompt_service.py (22) + test_phase3_prompts_router.py (11) - 162/166 total pass, 4 skipped, 0 fail --- .plans/package3_enhancement_plan.md | 1001 +++++++++++++++++ backend/app/core/dependencies.py | 6 + backend/app/main.py | 3 +- backend/app/models/prompts.py | 29 + backend/app/routers/prompts.py | 81 ++ backend/app/services/prompt_service.py | 168 +++ backend/app/test/conftest.py | 38 + .../app/test/test_phase3_prompt_service.py | 248 ++++ .../app/test/test_phase3_prompts_router.py | 216 ++++ 9 files changed, 1789 insertions(+), 1 deletion(-) create mode 100644 .plans/package3_enhancement_plan.md create mode 100644 backend/app/models/prompts.py create mode 100644 backend/app/routers/prompts.py create mode 100644 backend/app/services/prompt_service.py create mode 100644 backend/app/test/test_phase3_prompt_service.py create mode 100644 backend/app/test/test_phase3_prompts_router.py diff --git a/.plans/package3_enhancement_plan.md b/.plans/package3_enhancement_plan.md new file mode 100644 index 0000000..58d948d --- /dev/null +++ b/.plans/package3_enhancement_plan.md @@ -0,0 +1,1001 @@ +# Package 3 Enhancement Plan + +**Source**: User request (2026-04-25) +**Scope**: System Prompt Configuration Page + Query History Page +**Status**: ๐Ÿ”ง In Progress (3.1 โœ…, 3.2 โœ…, 3.3 in progress) + +--- + +## Objective + +Add two new features that give users visibility and control over the RAG pipeline: + +1. **System Prompt Configuration Page** โ€” Users can view/edit the full prompt templates for all 3 LLM calls (Decomposer, Relevance Filter, Response Generator). Templates support placeholders (`{question}`, `{chunks}`, `{context}`) that are replaced at query time. Supports 3 profiles (A, B, C) that users switch between with a single click. + +2. **Query History Page** โ€” Records every query with full detail: input text, extracted questions, timing per pipeline stage (decompose, retrieve, filter, generate), chunks retrieved/filtered counts, final answer, sources, total time, and which profile was used. + +--- + +## Current State + +### What Exists + +**LLM Pipeline** (3 calls, prompt templates hardcoded in service files): + +| Call | Service | File:Line | Current Prompt Template | Temp | Placeholders | +|------|---------|-----------|------------------------|------|--------------| +| 1 | `QueryDecomposer` | `services/query_decomposer.py:54-59` | `"Given this question: '{question}'\n\nBreak it down into 2-5 simplified sub-questions..."` | default (0.7) | `{question}` | +| 2 | `RelevanceFilter` | `services/relevance_filter.py:36-39` | `"Given question '{question}' and these document chunks, rate each 0-10 for relevance. Return JSON array of scores.\n{chunks_string}"` | 0.0 | `{question}`, `{chunks}` | +| 3 | `RAGService` | `services/rag.py:108-117` | `"Question: {question}\n\nAnswer the question using ONLY these document chunks...bullet points...cite sources...\n\nDocument chunks:\n{context}\n\nAnswer:"` | 0.3 | `{question}`, `{context}` | + +- `LLMClient.complete(prompt, temperature, step_name)` โ€” single method, sends prompt as `[{"role": "user", "content": prompt}]` +- All 3 prompts are f-strings built inline in the service methods โ€” no template abstraction exists +- The `step_name` parameter is only used for log labels + +**Data Storage:** +- **No SQL database exists.** ChromaDB is the only persistent store (vector database). +- Config is `.env`-driven via `pydantic-settings.BaseSettings` (flat key-value, not user-editable at runtime). +- Logging exists (RotatingFileHandler to `backend/app/log/backend.log`) โ€” timing data is logged but never persisted. + +**Frontend:** +- 3 pages: `LTTPage` (/), `RAGDatabasePage` (/rag-database), `PdfViewerPage` (/pdf-viewer) +- NavBar has "LTT" and "RAG Database" links +- No history page, no settings/configuration page +- No shadcn/ui โ€” all components are custom Tailwind + +**Query Pipeline (SSE streaming)**: +``` +POST /api/v1/query + โ†’ QueryDecomposer.decompose() [LLM Call 1, timing logged only] + โ†’ RAGService.retrieve() [ChromaDB, no timing capture] + โ†’ RelevanceFilter.filter() [LLM Call 2, timing logged only] + โ†’ RAGService.generate_response() [LLM Call 3, timing logged only] + โ†’ SSE: completed event with answer + sources +``` + +### What's Missing (Gaps This Plan Fills) + +- No way for users to customize LLM prompts +- No persistence of query history โ€” all queries are ephemeral +- No record of how long each pipeline stage takes +- No way to review past queries and answers +- No user-facing configuration page of any kind +- Hardcoded prompt templates can't be tuned without changing source code + +--- + +## Feature 1: System Prompt Configuration (Full Template Editing) + +### 1.1 Overview + +Users edit the **complete prompt template** for each of the 3 LLM calls. Templates contain placeholder variables (e.g., `{question}`, `{chunks}`, `{context}`) that are replaced with actual data at query time. Three profiles (A, B, C) let users save and switch between different prompt sets. + +**Design Decision**: Unlike the original plan (system role prefix + hardcoded user template), users edit the ENTIRE prompt. This gives full control over LLM instructions, output format, and behavior. The page documents exactly which placeholders are available for each step so users know what they can use. + +### 1.2 Database Schema + +**Database**: `backend/data/prompts.db` (SQLite, stdlib `sqlite3`) + +```sql +CREATE TABLE IF NOT EXISTS system_prompt_profiles ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + name TEXT NOT NULL UNIQUE, -- "A" | "B" | "C" + is_active INTEGER DEFAULT 0, -- only ONE row has is_active = 1 + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE TABLE IF NOT EXISTS system_prompts ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + profile_id INTEGER NOT NULL, + step_name TEXT NOT NULL, -- "decompose" | "filter" | "generate" + prompt_template TEXT NOT NULL, -- full prompt with {placeholder} variables + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')), + FOREIGN KEY (profile_id) REFERENCES system_prompt_profiles(id) ON DELETE CASCADE, + UNIQUE(profile_id, step_name) +); +``` + +**Default seed data** (3 profiles ร— 3 prompts = 9 rows, Profile A active by default): + +All 3 profiles start with the **same defaults** (the current hardcoded prompts). Users customize from there. + +| Profile | Step | Placeholder | Seed Template | +|---------|------|-------------|---------------| +| A | decompose | `{question}` | `"Given this question: '{question}'\n\nBreak it down into 2-5 simplified sub-questions that would help search for relevant information. Each sub-question should be short and focused on one aspect. Return as a JSON array of strings."` | +| A | filter | `{question}`, `{chunks}` | `"Given question '{question}' and these document chunks, rate each 0-10 for relevance.\nReturn JSON array of scores.\n{chunks}\n"` | +| A | generate | `{question}`, `{context}` | `"Question: {question}\n\nAnswer the question using ONLY these document chunks. Do not use any external knowledge. Format your answer as bullet points. Cite your sources inline using the exact bracket labels provided, e.g. [filename, page N]. Place the citation at the end of each relevant point.\n\nDocument chunks:\n{context}\n\nAnswer:"` | +| B | decompose | `{question}` | (same as A) | +| B | filter | `{question}`, `{chunks}` | (same as A) | +| B | generate | `{question}`, `{context}` | (same as A) | +| C | decompose | `{question}` | (same as A) | +| C | filter | `{question}`, `{chunks}` | (same as A) | +| C | generate | `{question}`, `{context}` | (same as A) | + +### 1.3 Available Placeholders (per step) + +These are documented on the frontend edit page so users know exactly what they can insert: + +| Step | Placeholder | What It Contains | Example Replacement | +|------|-------------|------------------|---------------------| +| **Decompose** | `{question}` | The user's original input text | `"What is the NEC4 clause about time extensions?"` | +| **Filter** | `{question}` | The user's original input text | (same) | +| | `{chunks}` | Numbered list of all retrieved chunks: `Chunk 1: \nChunk 2: ...` | `"Chunk 1: The NEC4 clause 61.3 states that time extensions...\nChunk 2: Notice must be given..."` | +| **Generate** | `{question}` | The user's original input text | (same) | +| | `{context}` | Formatted chunks with citation labels: `[filename, page N] Source: ...\nSummary: ...\nContent: ...` | `"[NEC4 ACC.pdf, page 3] Source: NEC4 ACC.pdf\nSummary: Discussion of time extension provisions...\nContent: Clause 61.3 states..."` | + +**Placeholder syntax**: `{variable_name}` โ€” must match exactly. Unknown placeholders are left as-is (not replaced). If a user removes a required placeholder (e.g., `{question}`), the LLM won't see the question โ€” the UI warns but doesn't block. + +### 1.4 Backend Architecture + +#### New Files + +| File | Purpose | +|------|---------| +| `backend/app/core/sqlite_db.py` | SQLite connection factory (shared by prompts + history) | +| `backend/app/services/prompt_service.py` | CRUD for prompt profiles and templates; template formatting | +| `backend/app/routers/prompts.py` | REST API endpoints for prompt management | +| `backend/app/models/prompts.py` | Pydantic schemas for prompt request/response | + +#### Modified Files + +| File | Change | +|------|--------| +| `backend/app/core/config.py` | Add `prompts_db_path` and `history_db_path` | +| `backend/app/core/dependencies.py` | Add DI factories: `get_prompt_service()` | +| `backend/app/main.py` | Register `prompts` router; startup: create tables + seed 3 default profiles | +| `backend/app/services/query_decomposer.py` | `decompose()` fetches template from prompt service, formats with `{question}`, sends to LLM | +| `backend/app/services/relevance_filter.py` | `filter()` fetches template from prompt service, formats with `{question}` and `{chunks}`, sends to LLM | +| `backend/app/services/rag.py` | `generate_response()` fetches template from prompt service, formats with `{question}` and `{context}`, sends to LLM | +| `backend/app/routers/query.py` | Pass `PromptService` to pipeline; record active profile name for history | + +#### How Template Formatting Works + +Each service method changes from building a hardcoded prompt to fetching and formatting a template: + +**Before** (query_decomposer.py): +```python +prompt = ( + f"Given this question: '{question}'\n\n" + f"Break it down into 2-5 simplified sub-questions..." +) +response = await self.llm_client.complete(prompt, step_name="QueryDecomposer") +``` + +**After** (query_decomposer.py): +```python +template = self.prompt_service.get_prompt_template(step="decompose") +prompt = template.replace("{question}", question) +response = await self.llm_client.complete(prompt, step_name="QueryDecomposer") +``` + +**`PromptService.get_prompt_template()`** fetches the template for the currently active profile + given step. Uses Python `str.replace()` for placeholder substitution โ€” simple, predictable, no `str.format()` edge cases with curly braces in user text. + +**Note**: `LLMClient.complete()` does NOT change โ€” no `system_prompt` parameter is added. Templates remain single user-role messages, same as today. The only difference is the prompt text comes from the DB instead of being hardcoded. + +#### API Endpoints (5 total โ€” fixed 3 profiles, no create/delete) + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/v1/prompts/profiles` | List all 3 profiles with active status: `[{name: "A", is_active: true}, ...]` | +| `PUT` | `/api/v1/prompts/profiles/{name}/activate` | Activate a profile by name (e.g., `PUT /profiles/B/activate`). Validates name is A/B/C. | +| `GET` | `/api/v1/prompts/profiles/{name}` | Get all 3 prompt templates for a profile | +| `PUT` | `/api/v1/prompts/profiles/{name}/{step}` | Update a single prompt template. Validates step is decompose/filter/generate. | +| `PUT` | `/api/v1/prompts/profiles/{name}/all` | Batch update all 3 prompt templates for a profile | + +**Why fixed 3 profiles (no create/delete)**: +- Simplest mental model: 3 slots, name them A/B/C +- No duplicate name conflicts, no "delete last profile" edge case +- "Reset to Defaults" restores the seed template for a profile + +### 1.5 Frontend Design + +**New page**: `/system-prompts` +**New NavBar link**: "System Prompts" + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ System Prompts โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ โ”‚ +โ”‚ Active Profile: [A โ–ผ] [Set Active] โ”‚ +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ โ— Profile A (active) [Edit] โ”‚ โ”‚ +โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ +โ”‚ โ”‚ โ—‹ Profile B [Edit] โ”‚ โ”‚ +โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ +โ”‚ โ”‚ โ—‹ Profile C [Edit] โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ โ”€โ”€ Editing Profile A โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ โ”‚ +โ”‚ Available placeholders: โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ {question} โ€” The user's input question โ”‚ โ”‚ +โ”‚ โ”‚ {chunks} โ€” Retrieved document chunks (filter) โ”‚ โ”‚ +โ”‚ โ”‚ {context} โ€” Formatted chunks with citations โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ Step 1: Query Decomposition โ”‚ +โ”‚ Placeholders: {question} โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Given this question: '{question}' โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ +โ”‚ โ”‚ Break it down into 2-5 simplified sub-questions โ”‚ โ”‚ +โ”‚ โ”‚ that would help search for relevant information. โ”‚ โ”‚ +โ”‚ โ”‚ Each sub-question should be short and focused on โ”‚ โ”‚ +โ”‚ โ”‚ one aspect. Return as a JSON array of strings. โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ Step 2: Relevance Filtering โ”‚ +โ”‚ Placeholders: {question}, {chunks} โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Given question '{question}' and these document โ”‚ โ”‚ +โ”‚ โ”‚ chunks, rate each 0-10 for relevance. โ”‚ โ”‚ +โ”‚ โ”‚ Return JSON array of scores. โ”‚ โ”‚ +โ”‚ โ”‚ {chunks} โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ Step 3: Response Generation โ”‚ +โ”‚ Placeholders: {question}, {context} โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Question: {question} โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ +โ”‚ โ”‚ Answer the question using ONLY these document โ”‚ โ”‚ +โ”‚ โ”‚ chunks. Do not use any external knowledge. โ”‚ โ”‚ +โ”‚ โ”‚ Format your answer as bullet points. โ”‚ โ”‚ +โ”‚ โ”‚ Cite your sources inline... โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ +โ”‚ โ”‚ Document chunks: โ”‚ โ”‚ +โ”‚ โ”‚ {context} โ”‚ โ”‚ +โ”‚ โ”‚ โ”‚ โ”‚ +โ”‚ โ”‚ Answer: โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ [Save Changes] [Reset All to Defaults] [Cancel] โ”‚ +โ”‚ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +**Component tree**: +``` +SystemPromptsPage +โ”œโ”€โ”€ ProfileSelector (dropdown A/B/C + "Set Active" button) +โ”œโ”€โ”€ ProfileList (3 cards, active indicator) +โ”‚ โ””โ”€โ”€ ProfileCard ร— 3 (name, active indicator, Edit button) +โ”œโ”€โ”€ PlaceholderDocs (info box showing available placeholders per step) +โ””โ”€โ”€ PromptEditor (shown when editing a profile) + โ”œโ”€โ”€ PromptTextArea ร— 3 (labeled with step name + available placeholders) + โ”‚ โ””โ”€โ”€ Per-step reset icon (โ†บ) next to each textarea label + โ””โ”€โ”€ ActionBar (Save, Reset All to Defaults, Cancel) +``` + +**Placeholder documentation in UI**: The page shows a "Available Placeholders" info box listing all placeholder variables and what they expand to. Each textarea has a subtle label showing which placeholders are valid for that step (e.g., "Placeholders: `{question}`, `{chunks}`"). Unknown placeholders in the template are left as-is by the backend โ€” the UI shows a soft warning if the template references an unknown placeholder, but doesn't block saving. + +**API hooks** (new in `lib/queries.tsx`): +```typescript +usePromptProfiles() // useQuery: GET /prompts/profiles +usePromptProfile(name) // useQuery: GET /prompts/profiles/{name} +useActivateProfile(name) // useMutation: PUT /prompts/profiles/{name}/activate +useUpdatePrompt(name, step) // useMutation: PUT /prompts/profiles/{name}/{step} +useUpdateAllPrompts(name) // useMutation: PUT /prompts/profiles/{name}/all +``` + +**Edge cases handled**: +- Empty prompt template: allowed (LLM call proceeds with empty prompt โ€” LLM will likely error or return nothing) +- Removed `{question}` placeholder: soft warning shown; LLM won't see the question โ€” user's choice +- Unknown placeholder in template (e.g., `{foo}`): left as-is, UI shows warning badge +- Very long templates: textarea with vertical scroll, character count +- Unsaved changes: warn before navigating away +- Loading state: skeleton cards +- Error state: red error banner with retry + +### 1.6 Acceptance Criteria +- [ ] `/system-prompts` page accessible via NavBar link +- [ ] 3 profiles (A/B/C) shown with active indicator (โ— / โ—‹) +- [ ] "Set Active" switches which profile is used for queries +- [ ] Editing a profile shows 3 labeled textareas pre-filled with current templates +- [ ] Each textarea shows its available placeholders +- [ ] "Save Changes" persists templates to DB +- [ ] Per-step reset icon (โ†บ) restores the seed template for that individual step +- [ ] "Reset All to Defaults" restores all 3 templates for the profile at once +- [ ] "Cancel" reverts unsaved edits +- [ ] Changing a template affects the NEXT query (fetched fresh each time) +- [ ] Placeholder docs visible on the page +- [ ] `pytest` backend tests pass (new + existing) +- [ ] `npm test` frontend tests pass (new + existing) + +--- + +## Feature 2: Query History + +### 2.1 Overview + +Every query submitted through the LTT page is recorded in a history database with detailed timing per pipeline stage. Users can browse past queries, see timing breakdowns, and review answers. + +### 2.2 Database Schema + +**Database**: `backend/data/history.db` (SQLite, separate from prompts.db) + +```sql +CREATE TABLE IF NOT EXISTS query_history ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + input_text TEXT NOT NULL, -- original user input + extracted_questions TEXT DEFAULT NULL, -- JSON array of sub-questions + decomposer_time_ms INTEGER DEFAULT 0, -- LLM Call 1 duration + retriever_time_ms INTEGER DEFAULT 0, -- ChromaDB retrieval duration + chunks_retrieved INTEGER DEFAULT 0, -- chunks from ChromaDB + filter_time_ms INTEGER DEFAULT 0, -- LLM Call 2 duration + chunks_filtered INTEGER DEFAULT 0, -- chunks after relevance filtering + generator_time_ms INTEGER DEFAULT 0, -- LLM Call 3 duration + total_time_ms INTEGER DEFAULT 0, -- input received โ†’ final response sent + final_answer TEXT DEFAULT NULL, -- full RAG answer text + sources TEXT DEFAULT NULL, -- JSON array of SourceMetadata + profile_used TEXT DEFAULT NULL, -- "A", "B", or "C" + created_at TEXT NOT NULL DEFAULT (datetime('now')) +); + +CREATE INDEX IF NOT EXISTS idx_query_history_created_at ON query_history(created_at DESC); +``` + +### 2.3 Backend Architecture + +#### New Files + +| File | Purpose | +|------|---------| +| `backend/app/services/history_service.py` | CRUD for query history records | +| `backend/app/routers/history.py` | REST API endpoints for history browsing | +| `backend/app/models/history.py` | Pydantic schemas for history request/response | + +#### Modified Files + +| File | Change | +|------|--------| +| `backend/app/core/sqlite_db.py` | Add `get_prompts_db()` and `get_history_db()` connection factories | +| `backend/app/core/config.py` | Add `prompts_db_path` and `history_db_path` | +| `backend/app/core/dependencies.py` | Add `get_history_service()` | +| `backend/app/main.py` | Register `history` router; startup: create history table | +| `backend/app/routers/query.py` | Wrap pipeline in `time.perf_counter()`; record history via `asyncio.create_task()` | + +#### Timing Capture (in `_query_stream()`) + +```python +async def _query_stream(request: QueryRequest): + overall_start = time.perf_counter() + + # Fetch prompt templates for active profile + decompose_template = prompt_service.get_prompt_template("decompose") + filter_template = prompt_service.get_prompt_template("filter") + generate_template = prompt_service.get_prompt_template("generate") + active_profile = prompt_service.get_active_profile_name() # "A", "B", or "C" + + # Stage 1: Decompose + stage_start = time.perf_counter() + prompt = decompose_template.replace("{question}", question) + response = await llm_client.complete(prompt, step_name="QueryDecomposer") + decomposer_time_ms = int((time.perf_counter() - stage_start) * 1000) + questions = parse_questions(response) + yield sse_event("decomposed", ...) + + # Stage 2: Retrieve + stage_start = time.perf_counter() + chunks, metadata = await rag.retrieve(question_texts=questions, ...) + retriever_time_ms = int((time.perf_counter() - stage_start) * 1000) + chunks_retrieved = len(chunks) + yield sse_event("retrieving", ...) + + # Stage 3: Filter + stage_start = time.perf_counter() + prompt = filter_template.replace("{question}", question) + prompt = prompt.replace("{chunks}", format_chunks(chunks)) + response = await llm_client.complete(prompt, temperature=0.0, step_name="RelevanceFilter") + filter_time_ms = int((time.perf_counter() - stage_start) * 1000) + filtered = parse_scores(response, chunks, threshold) + chunks_filtered = len(filtered) + yield sse_event("filtering", ...) + + # Stage 4: Generate + stage_start = time.perf_counter() + prompt = generate_template.replace("{question}", question) + prompt = prompt.replace("{context}", format_context(filtered, metadata)) + answer = await llm_client.complete(prompt, temperature=0.3, step_name="ResponseGeneration") + generator_time_ms = int((time.perf_counter() - stage_start) * 1000) + + total_time_ms = int((time.perf_counter() - overall_start) * 1000) + + # Record history (fire-and-forget) + asyncio.create_task(history_service.record(QueryHistoryRecord( + input_text=request.question, + extracted_questions=json.dumps(questions), + decomposer_time_ms=decomposer_time_ms, + retriever_time_ms=retriever_time_ms, + chunks_retrieved=chunks_retrieved, + filter_time_ms=filter_time_ms, + chunks_filtered=chunks_filtered, + generator_time_ms=generator_time_ms, + total_time_ms=total_time_ms, + final_answer=answer, + sources=json.dumps([s.dict() for s in sources]), + profile_used=active_profile, + ))) + + yield sse_event("completed", ...) +``` + +**Fire-and-forget**: `asyncio.create_task()` ensures history recording never blocks the SSE stream. If recording fails, the query completes normally โ€” history is best-effort. + +#### API Endpoints + +| Method | Path | Description | +|--------|------|-------------| +| `GET` | `/api/v1/history` | List query history (paginated, newest first). Query params: `limit` (default 50), `offset` (default 0) | +| `GET` | `/api/v1/history/{query_id}` | Get full detail for a single query | +| `DELETE` | `/api/v1/history/{query_id}` | Delete a history record | +| `DELETE` | `/api/v1/history` | Clear all history | +| `GET` | `/api/v1/history/stats` | Aggregate stats: total queries, avg time, avg chunks, most used profile | + +#### Response Schemas + +```python +class QueryHistorySummary(BaseModel): + id: int + input_text: str # truncated to 100 chars + total_time_ms: int + chunks_retrieved: int + chunks_filtered: int + profile_used: str | None # "A", "B", or "C" + created_at: str + +class QueryHistoryDetail(BaseModel): + id: int + input_text: str # full text + extracted_questions: list[str] + decomposer_time_ms: int + retriever_time_ms: int + filter_time_ms: int + generator_time_ms: int + total_time_ms: int + chunks_retrieved: int + chunks_filtered: int + final_answer: str + sources: list[SourceMetadata] + profile_used: str | None + created_at: str + +class QueryHistoryList(BaseModel): + queries: list[QueryHistorySummary] + total: int + limit: int + offset: int +``` + +### 2.4 Frontend Design + +**New page**: `/history` +**New NavBar link**: "History" + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Query History Total: 42 queries โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ ๐Ÿ“Š Stats โ”‚ โ”‚ +โ”‚ โ”‚ Avg time: 3.2s ยท Avg chunks: 8.5 โ†’ 4.2 filtered โ”‚ โ”‚ +โ”‚ โ”‚ Most used: Profile A (35 queries) โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ #42 ยท 2026-04-25 14:32 ยท 3.8s ยท Profile A โ”‚ โ”‚ +โ”‚ โ”‚ "What is the NEC4 clause about time extensions?" โ”‚ โ”‚ +โ”‚ โ”‚ 8 chunks โ†’ 4 filtered ยท [Expand โ–ผ] โ”‚ โ”‚ +โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ +โ”‚ โ”‚ #41 ยท 2026-04-25 14:15 ยท 2.1s ยท Profile B โ”‚ โ”‚ +โ”‚ โ”‚ "How does arbitration work under the contract?" โ”‚ โ”‚ +โ”‚ โ”‚ 10 chunks โ†’ 3 filtered ยท [Expand โ–ผ] โ”‚ โ”‚ +โ”‚ โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค โ”‚ +โ”‚ โ”‚ #40 ยท 2026-04-25 13:50 ยท 4.5s ยท Profile A โ”‚ โ”‚ +โ”‚ โ”‚ "Explain the payment mechanism and valuation..." โ”‚ โ”‚ +โ”‚ โ”‚ 12 chunks โ†’ 6 filtered ยท [Expand โ–ผ] โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ [Load More] [Clear All] โ”‚ +โ”‚ โ”‚ +โ”‚ โ”€โ”€ Expanded: #42 โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ +โ”‚ โ”‚ +โ”‚ โฑ Pipeline Timing: โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Decompose โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘ 0.8s โ”‚ โ”‚ +โ”‚ โ”‚ Retrieve โ–ˆโ–ˆโ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘ 0.2s (8 chunks) โ”‚ โ”‚ +โ”‚ โ”‚ Filter โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘โ–‘ 1.1s (4 kept) โ”‚ โ”‚ +โ”‚ โ”‚ Generate โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ 1.7s โ”‚ โ”‚ +โ”‚ โ”‚ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ โ”‚ โ”‚ +โ”‚ โ”‚ Total โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ 3.8s โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ ๐Ÿ“ Extracted Questions: โ”‚ +โ”‚ 1. What are the time extension provisions? โ”‚ +โ”‚ 2. What notice is required for time extensions? โ”‚ +โ”‚ 3. How is extended time calculated under NEC4? โ”‚ +โ”‚ โ”‚ +โ”‚ ๐Ÿ’ฌ Answer: โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ โ€ข The time extension provisions are outlined in โ”‚ โ”‚ +โ”‚ โ”‚ clause 61.3 [NEC4 ACC.pdf, page 3] โ”‚ โ”‚ +โ”‚ โ”‚ โ€ข Notice must be given within 8 weeks [NEC4 ACC... โ”‚ โ”‚ +โ”‚ โ”‚ ... โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ ๐Ÿ“Ž Sources (4): ยท NEC4 ACC.pdf, page 3 ยท ... โ”‚ +โ”‚ ๐Ÿ“‹ Profile used: A โ”‚ +โ”‚ โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +**Component tree**: +``` +HistoryPage +โ”œโ”€โ”€ HistoryStats (summary bar: total queries, avg time, avg chunks, most used profile) +โ”œโ”€โ”€ HistoryList (scrollable list) +โ”‚ โ””โ”€โ”€ HistoryCard ร— N (collapsed: date, time, question preview, profile badge) +โ”‚ โ””โ”€โ”€ HistoryDetail (expanded: timing bars, questions, answer, sources) +โ”œโ”€โ”€ LoadMoreButton +โ””โ”€โ”€ ClearAllButton (with confirmation dialog) +``` + +**Timing bars**: Pure CSS โ€” `
`. Color-coded: Decompose (blue-400), Retrieve (green-400), Filter (amber-400), Generate (purple-400). + +**API hooks**: +```typescript +useQueryHistory(limit, offset) // useQuery: GET /history +useQueryHistoryDetail(id) // useQuery: GET /history/{id} +useDeleteHistoryRecord(id) // useMutation: DELETE /history/{id} +useClearHistory() // useMutation: DELETE /history +useHistoryStats() // useQuery: GET /history/stats +``` + +### 2.5 Acceptance Criteria +- [ ] Every query creates a history record with all timing and data fields +- [ ] `GET /api/v1/history?limit=20&offset=0` returns paginated results (newest first) +- [ ] `GET /api/v1/history/{id}` returns full detail with parsed JSON fields +- [ ] `DELETE /api/v1/history/{id}` removes one record +- [ ] `DELETE /api/v1/history` clears all records +- [ ] `GET /api/v1/history/stats` returns aggregate statistics +- [ ] History recording is fire-and-forget โ€” never blocks query response +- [ ] History page accessible via NavBar link +- [ ] Timing bars accurately represent stage proportions +- [ ] Expanded detail shows answer rendered as markdown with citation links +- [ ] Sources show clickable links to PDF viewer +- [ ] All states: loading, empty, error, success +- [ ] Profile used is shown for each query +- [ ] All backend + frontend tests pass + +--- + +## Sub-Phase Breakdown + +| Sub-Phase | Feature | Difficulty | Backend | Frontend | Depends On | +|-----------|---------|-----------|---------|----------|------------| +| 3.1 | SQLite Infrastructure | โญโญ Medium | sqlite_db.py (dual-DB factories), config, table creation, seed data | None | โ€” | +| 3.2 | Prompt Backend | โญโญโญ Hard | prompt_service.py, prompts router, models, template formatting | None | 3.1 | +| 3.3 | Prompt Frontend Page | โญโญ Medium | None | SystemPromptsPage, ProfileList, PromptEditor, placeholder docs | 3.2 | +| 3.4 | Service Refactoring (Template Injection) | โญโญโญ Hard | query_decomposer, relevance_filter, rag.py, query.py | None | 3.2 | +| 3.5 | History Backend | โญโญโญ Hard | history_service.py, history router, models, query.py timing capture | None | 3.1, 3.4 | +| 3.6 | History Frontend Page | โญโญ Medium | None | HistoryPage, HistoryList, HistoryDetail, timing bars | 3.5 | + +### Dependency Graph + +``` +3.1 (SQLite Infra) + โ”‚ + โ”œโ”€โ”€โ–บ 3.2 (Prompt Backend) + โ”‚ โ”‚ + โ”‚ โ”œโ”€โ”€โ–บ 3.3 (Prompt Frontend) โ† parallel with 3.4 + โ”‚ โ”‚ + โ”‚ โ””โ”€โ”€โ–บ 3.4 (Service Refactoring) + โ”‚ โ”‚ + โ”‚ โ””โ”€โ”€โ–บ 3.5 (History Backend) + โ”‚ โ”‚ + โ”‚ โ””โ”€โ”€โ–บ 3.6 (History Frontend) +``` + +- **3.1** is the foundation +- **3.2** blocks 3.3 and 3.4 (both need the prompt service) +- **3.3 and 3.4 run in PARALLEL** after 3.2 +- **3.5** needs 3.1 (history DB) AND 3.4 (refactored pipeline for timing capture) +- **3.6** needs 3.5 (history API) + +--- + +## Sub-Phase 3.1: SQLite Infrastructure โญโญ Medium + +### Objective +Introduce SQLite with two separate databases: `prompts.db` for prompt templates and `history.db` for query history. Create connection factories, table schemas, and default seed data. + +### Database Technology + +**Decision**: `sqlite3` stdlib โ€” zero new dependencies. Lightweight operations, adequate for single-user desktop app. + +### Changes Required + +| File | Change | +|------|--------| +| `backend/app/core/sqlite_db.py` | **NEW** โ€” `get_prompts_db()` and `get_history_db()` connection factories; `init_prompts_db()`, `init_history_db()` table creation; `seed_default_profiles()` | +| `backend/app/core/config.py` | Add `prompts_db_path: str = "./data/prompts.db"` and `history_db_path: str = "./data/history.db"` | +| `backend/app/main.py` | Startup event: create `data/` dir, init both DBs, seed default profiles | +| `backend/.env.example` | Add `PROMPTS_DB_PATH` and `HISTORY_DB_PATH` | +| `backend/.gitignore` | Add `data/` directory | + +**`sqlite_db.py` design**: +```python +import sqlite3, os +from app.core.config import get_settings + +def _get_db(db_path: str) -> sqlite3.Connection: + """Shared connection factory (caller must close).""" + os.makedirs(os.path.dirname(db_path), exist_ok=True) + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA journal_mode=WAL") + conn.execute("PRAGMA foreign_keys=ON") + return conn + +def get_prompts_db() -> sqlite3.Connection: + return _get_db(get_settings().prompts_db_path) + +def get_history_db() -> sqlite3.Connection: + return _get_db(get_settings().history_db_path) +``` + +### Acceptance Criteria +- [ ] `backend/data/prompts.db` created on first startup with profile + prompt tables +- [ ] `backend/data/history.db` created on first startup with query_history table + index +- [ ] 3 profiles (A/B/C) seeded with current hardcoded prompts as default templates +- [ ] Profile A active by default +- [ ] `data/` directory gitignored +- [ ] Both DB paths configurable via `.env` +- [ ] Existing `pytest` tests still pass + +--- + +## Sub-Phase 3.2: Prompt Backend โญโญโญ Hard + +### Objective +Create the prompt service layer: Pydantic models, CRUD service, template formatting, REST API endpoints. + +### Changes Required + +| File | Change | +|------|--------| +| `backend/app/models/prompts.py` | **NEW** โ€” `PromptProfile`, `PromptSetResponse` (3 prompts), `PromptUpdateRequest` | +| `backend/app/services/prompt_service.py` | **NEW** โ€” `PromptService`: get_profile, list_profiles, activate, get_template, update_prompt, update_all, format_prompt | +| `backend/app/routers/prompts.py` | **NEW** โ€” 5 endpoints on `/api/v1/prompts` | +| `backend/app/core/dependencies.py` | Add `get_prompt_service()` | +| `backend/app/main.py` | Register `prompts` router | + +**`PromptService` key methods**: +```python +class PromptService: + def get_active_profile_name(self) -> str: + """Return "A", "B", or "C" โ€” which profile is active.""" + + def get_prompt_template(self, step: str) -> str: + """Get the template for the active profile + given step ("decompose"/"filter"/"generate").""" + + def list_profiles(self) -> list[dict]: + """Return [{name: "A", is_active: true}, ...].""" + + def activate_profile(self, name: str) -> None: + """Set is_active=1 for name, is_active=0 for others. Validates name in {A, B, C}.""" + + def get_profile_prompts(self, name: str) -> dict: + """Return {"decompose": "...", "filter": "...", "generate": "..."}.""" + + def update_prompt(self, name: str, step: str, template: str) -> None: + """Update single template. Validates step in {decompose, filter, generate}.""" + + def update_all_prompts(self, name: str, prompts: dict) -> None: + """Batch update all 3 templates.""" + + def reset_to_defaults(self, name: str, step: str | None = None) -> None: + """Restore seed template. If step is None, reset all 3 steps. Otherwise reset only that step.""" +``` + +### Acceptance Criteria +- [ ] `GET /api/v1/prompts/profiles` returns A/B/C with active status +- [ ] `PUT /api/v1/prompts/profiles/B/activate` switches active profile (only one at a time) +- [ ] `PUT /api/v1/prompts/profiles/A/decompose` updates template and persists across restarts +- [ ] `PUT /api/v1/prompts/profiles/A/all` batch-updates all 3 templates +- [ ] Invalid profile name (e.g., "D") returns 400 +- [ ] Invalid step name (e.g., "summarize") returns 400 +- [ ] Active profile is fetched fresh per query (no caching) +- [ ] All tests pass: `test_phase3_prompt_service.py`, `test_phase3_prompts_router.py` + +--- + +## Sub-Phase 3.3: Prompt Frontend Page โญโญ Medium + +### Objective +Build the System Prompts page at `/system-prompts` with profile switching and full template editing. + +### Changes Required + +| File | Change | +|------|--------| +| `frontend/src/pages/SystemPromptsPage.tsx` | **NEW** | +| `frontend/src/components/ProfileList.tsx` | **NEW** โ€” 3 cards (A/B/C) | +| `frontend/src/components/PromptEditor.tsx` | **NEW** โ€” 3 textareas + placeholder docs + save/reset/cancel | +| `frontend/src/components/PlaceholderDocs.tsx` | **NEW** โ€” info box listing available placeholders | +| `frontend/src/lib/api.ts` | Add 5 prompt API functions | +| `frontend/src/lib/queries.tsx` | Add TanStack Query hooks | +| `frontend/src/types/index.ts` | Add prompt-related types | +| `frontend/src/App.tsx` | Add `/system-prompts` route | +| `frontend/src/components/NavBar.tsx` | Add "System Prompts" nav link | + +### Acceptance Criteria +- [ ] Page accessible via NavBar +- [ ] 3 profiles shown: A (active โ—), B (โ—‹), C (โ—‹) +- [ ] "Set Active" switches active profile +- [ ] Editing a profile shows 3 labeled textareas with current templates +- [ ] Each textarea labeled with available placeholders +- [ ] Placeholder docs info box visible +- [ ] "Save Changes" persists; "Reset to Defaults" restores seed template; "Cancel" reverts +- [ ] Soft warning if template references unknown placeholder +- [ ] All states: loading, error, success +- [ ] Frontend tests pass + +--- + +## Sub-Phase 3.4: Service Refactoring (Template Injection) โญโญโญ Hard + +### Objective +Refactor all 3 LLM-calling services to fetch prompt templates from the DB instead of using hardcoded strings. Wire the query router to pass `PromptService` through the pipeline. + +### Changes Required + +| File | Change | +|------|--------| +| `backend/app/services/query_decomposer.py` | Accept `PromptService`; `decompose()` fetches template, replaces `{question}`, calls LLM | +| `backend/app/services/relevance_filter.py` | Accept `PromptService`; `filter()` fetches template, replaces `{question}` and `{chunks}`, calls LLM | +| `backend/app/services/rag.py` | Accept `PromptService`; `generate_response()` fetches template, replaces `{question}` and `{context}`, calls LLM | +| `backend/app/routers/query.py` | Instantiate `PromptService` at pipeline start; pass to all services; capture `active_profile_name` | +| `backend/app/test/conftest.py` | Add `mock_prompt_service` fixture | +| `backend/app/test/test_phase1_query_decomposer.py` | Update tests for PromptService dependency | +| `backend/app/test/test_phase1_relevance_filter.py` | Update tests | +| `backend/app/test/test_phase1_rag_service.py` | Update tests | + +**Before/After per service**: + +| Service | Before (hardcoded) | After (template from DB) | +|---------|-------------------|-------------------------| +| `QueryDecomposer.decompose()` | `f"Given this question: '{question}'\n\nBreak it down..."` | `template.replace("{question}", question)` | +| `RelevanceFilter._build_prompt()` | `f"Given question '{question}'...{chunks_formatted}"` | `template.replace("{question}", question).replace("{chunks}", chunks_formatted)` | +| `RAGService.generate_response()` | `f"Question: {question}\n\nAnswer...{context}\n\nAnswer:"` | `template.replace("{question}", question).replace("{context}", context)` | + +**`LLMClient.complete()` โ€” NO CHANGES.** Templates remain single user-role messages. + +### Acceptance Criteria +- [ ] All 3 LLM calls use templates from the active profile in the DB +- [ ] Placeholders correctly replaced: `{question}` โ†’ user input, `{chunks}` โ†’ numbered list, `{context}` โ†’ formatted chunks with citations +- [ ] Switching active profile changes prompts for NEXT query +- [ ] If template is empty string, LLM call proceeds with empty prompt (LLM error is acceptable) +- [ ] All existing tests pass (updated for PromptService dependency) +- [ ] New tests: `test_phase3_prompt_injection.py` + +--- + +## Sub-Phase 3.5: History Backend โญโญโญ Hard + +### Objective +Capture timing and data from every pipeline stage and persist to `history.db`. Expose REST API for browsing. + +### Changes Required + +| File | Change | +|------|--------| +| `backend/app/models/history.py` | **NEW** โ€” `QueryHistoryRecord`, `QueryHistorySummary`, `QueryHistoryDetail`, `QueryHistoryList` | +| `backend/app/services/history_service.py` | **NEW** โ€” `HistoryService`: record, list (paginated), get, delete, clear_all, get_stats | +| `backend/app/routers/history.py` | **NEW** โ€” 5 endpoints on `/api/v1/history` | +| `backend/app/routers/query.py` | Add `time.perf_counter()` around each stage; `asyncio.create_task(history_service.record(...))` at end | +| `backend/app/core/dependencies.py` | Add `get_history_service()` | +| `backend/app/main.py` | Register `history` router | + +**Timing stages captured**: decompose, retrieve, filter, generate, total. + +### Acceptance Criteria +- [ ] Every query creates a history record with all fields +- [ ] All 5 history API endpoints work correctly +- [ ] Pagination: `limit` + `offset`, newest first +- [ ] Stats endpoint: total queries, avg times, avg chunks, most used profile +- [ ] History recording is fire-and-forget (never blocks query) +- [ ] History persists across restarts +- [ ] All tests pass: `test_phase3_history_service.py`, `test_phase3_history_router.py`, `test_phase3_query_history_integration.py` + +--- + +## Sub-Phase 3.6: History Frontend Page โญโญ Medium + +### Objective +Build the History page at `/history` with scrollable list, expandable detail, timing bars, and stats. + +### Changes Required + +| File | Change | +|------|--------| +| `frontend/src/pages/HistoryPage.tsx` | **NEW** | +| `frontend/src/components/HistoryList.tsx` | **NEW** | +| `frontend/src/components/HistoryCard.tsx` | **NEW** โ€” collapsed card + expandable detail | +| `frontend/src/components/TimingBar.tsx` | **NEW** โ€” CSS-width proportional bars | +| `frontend/src/lib/api.ts` | Add 5 history API functions | +| `frontend/src/lib/queries.tsx` | Add TanStack Query hooks | +| `frontend/src/types/index.ts` | Add history types | +| `frontend/src/App.tsx` | Add `/history` route | +| `frontend/src/components/NavBar.tsx` | Add "History" nav link | + +### Acceptance Criteria +- [ ] Page accessible via NavBar +- [ ] Stats bar: total, avg time, avg chunks, most used profile +- [ ] History list: paginated, newest first, shows date/time/duration/input preview/profile badge +- [ ] Expand card: timing bars, extracted questions, full answer (markdown), sources (clickable) +- [ ] "Load More" pagination +- [ ] "Clear All" with confirmation +- [ ] Individual delete with confirmation +- [ ] All states: loading skeleton, empty "No queries yet", error with retry +- [ ] Frontend tests pass + +--- + +## New Dependencies + +**Zero.** `sqlite3` is Python stdlib. All UI is custom Tailwind. No new npm or pip packages. + +--- + +## Directory Structure After Package 3 + +``` +legco_reranker/ +โ”œโ”€โ”€ backend/ +โ”‚ โ”œโ”€โ”€ app/ +โ”‚ โ”‚ โ”œโ”€โ”€ core/ +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ config.py # + prompts_db_path, history_db_path +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ database.py # (unchanged - ChromaDB) +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ dependencies.py # + get_prompt_service, get_history_service +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ sqlite_db.py # NEW - dual-DB connection factories +โ”‚ โ”‚ โ”œโ”€โ”€ models/ +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ history.py # NEW +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ prompts.py # NEW +โ”‚ โ”‚ โ”œโ”€โ”€ routers/ +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ history.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ prompts.py # NEW +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ query.py # MODIFIED - timing capture + template injection +โ”‚ โ”‚ โ”œโ”€โ”€ services/ +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ history_service.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ prompt_service.py # NEW - template storage + formatting +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ query_decomposer.py # MODIFIED - use PromptService for templates +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ rag.py # MODIFIED - use PromptService for templates +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ relevance_filter.py # MODIFIED - use PromptService for templates +โ”‚ โ”‚ โ”œโ”€โ”€ test/ +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_prompt_service.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_prompts_router.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_prompt_injection.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_history_service.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_history_router.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase3_query_history_integration.py # NEW +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase1_query_decomposer.py # MODIFIED +โ”‚ โ”‚ โ”‚ โ”œโ”€โ”€ test_phase1_relevance_filter.py # MODIFIED +โ”‚ โ”‚ โ”‚ โ””โ”€โ”€ test_phase1_rag_service.py # MODIFIED +โ”‚ โ”‚ โ””โ”€โ”€ main.py # MODIFIED - startup init + new routers +โ”‚ โ”œโ”€โ”€ data/ # NEW (gitignored) +โ”‚ โ”‚ โ”œโ”€โ”€ prompts.db +โ”‚ โ”‚ โ””โ”€โ”€ history.db +โ”‚ โ””โ”€โ”€ .env.example # + PROMPTS_DB_PATH, HISTORY_DB_PATH +โ”œโ”€โ”€ frontend/src/ +โ”‚ โ”œโ”€โ”€ components/ +โ”‚ โ”‚ โ”œโ”€โ”€ HistoryCard.tsx # NEW +โ”‚ โ”‚ โ”œโ”€โ”€ HistoryList.tsx # NEW +โ”‚ โ”‚ โ”œโ”€โ”€ NavBar.tsx # MODIFIED - +2 nav links +โ”‚ โ”‚ โ”œโ”€โ”€ PlaceholderDocs.tsx # NEW +โ”‚ โ”‚ โ”œโ”€โ”€ ProfileList.tsx # NEW +โ”‚ โ”‚ โ”œโ”€โ”€ PromptEditor.tsx # NEW +โ”‚ โ”‚ โ””โ”€โ”€ TimingBar.tsx # NEW +โ”‚ โ”œโ”€โ”€ pages/ +โ”‚ โ”‚ โ”œโ”€โ”€ HistoryPage.tsx # NEW +โ”‚ โ”‚ โ””โ”€โ”€ SystemPromptsPage.tsx # NEW +โ”‚ โ”œโ”€โ”€ lib/ +โ”‚ โ”‚ โ”œโ”€โ”€ api.ts # MODIFIED - +history +prompts endpoints +โ”‚ โ”‚ โ””โ”€โ”€ queries.tsx # MODIFIED - +history +prompts hooks +โ”‚ โ”œโ”€โ”€ types/index.ts # MODIFIED - +history +prompts types +โ”‚ โ””โ”€โ”€ App.tsx # MODIFIED - +2 routes +โ””โ”€โ”€ .gitignore # + data/ +``` + +--- + +## Test Plan + +### Backend Tests (New) + +| File | Coverage | Sub-Phase | +|------|----------|-----------| +| `test_phase3_prompt_service.py` | Prompt CRUD, activation, template formatting, edge cases | 3.2 | +| `test_phase3_prompts_router.py` | All 5 HTTP endpoints, error codes, validation | 3.2 | +| `test_phase3_prompt_injection.py` | Templates fetched from DB, placeholders replaced, end-to-end query uses templates | 3.4 | +| `test_phase3_history_service.py` | History CRUD, pagination, stats, edge cases | 3.5 | +| `test_phase3_history_router.py` | All 5 HTTP endpoints, pagination bounds, empty DB | 3.5 | +| `test_phase3_query_history_integration.py` | Full SSE query โ†’ history record created with correct data | 3.5 | + +### Backend Tests (Modified) + +| File | Change | Sub-Phase | +|------|--------|-----------| +| `test_phase1_query_decomposer.py` | Add PromptService dependency to test setup | 3.4 | +| `test_phase1_relevance_filter.py` | Add PromptService dependency | 3.4 | +| `test_phase1_rag_service.py` | Add PromptService dependency | 3.4 | +| `conftest.py` | Add `mock_prompt_service` fixture | 3.2 | + +### Frontend Tests (New) + +| File | Coverage | Sub-Phase | +|------|----------|-----------| +| `SystemPromptsPage.test.tsx` | Page render, profile list, activation, edit flows | 3.3 | +| `ProfileList.test.tsx` | A/B/C cards, active indicator, edit button | 3.3 | +| `PromptEditor.test.tsx` | 3 textareas, placeholder docs, save/reset/cancel | 3.3 | +| `HistoryPage.test.tsx` | Page render, stats, pagination, clear all | 3.6 | +| `HistoryCard.test.tsx` | Collapsed/expanded states, timing bars, answer, sources | 3.6 | +| `TimingBar.test.tsx` | Proportional widths, zero-time stages, color mapping | 3.6 | + +### Acceptance Tests + +| File | Coverage | Sub-Phase | +|------|----------|-----------| +| `test_acceptance_package3_prompts.py` | Create profile โ†’ edit templates โ†’ activate โ†’ query uses new templates | 3.2-3.4 | +| `test_acceptance_package3_history.py` | Multiple queries โ†’ history shows correct records with timing + profile | 3.5 | + +--- + +## Risks & Mitigations + +| Risk | Impact | Mitigation | +|------|--------|------------| +| User removes `{question}` placeholder โ†’ LLM doesn't see the question | LLM returns irrelevant or empty response | UI shows soft warning; user's choice โ€” they can always reset to defaults | +| `str.replace()` is case-sensitive โ†’ `{Question}` not recognized | Placeholder left as-is in prompt | UI documents exact placeholder names; preview mode could highlight unresolved placeholders | +| `sqlite3` sync calls block async event loop | Slow responses under load | Operations are trivial (single-row lookups). History recording is fire-and-forget. WAL mode for concurrent reads. | +| History DB grows unbounded | Disk usage | Manual cleanup via "Clear All" button. Future: auto-prune config. | +| `data/` directory not created on startup | SQLite connection fails | `os.makedirs(dirname, exist_ok=True)` in connection factory | +| User expects `{question}` to work in filter/generate templates | Might add it in wrong context | Placeholder docs on page show exactly which placeholders are valid per step | +| Two separate DB files complicate backups | User might backup one but not the other | Use same `data/` directory โ€” easy to back up as one folder | + +--- + +## Decisions + +| # | Question | Decision | +|---|----------|----------| +| 1 | Template editing scope | **Full prompt template** with `{placeholder}` variables โ€” users edit the entire message sent to LLM | +| 2 | System role vs user role | **User role only** โ€” no system prompt concept. Templates are the full user message (same as current). | +| 3 | Number of profiles | **Fixed 3** (A, B, C) โ€” no create/delete. Simplest mental model. | +| 4 | Database separation | **Two files**: `prompts.db` and `history.db` โ€” independent concerns | +| 5 | Database technology | **sqlite3 stdlib** โ€” zero new dependencies | +| 6 | Placeholder syntax | **`{variable_name}`** with `str.replace()` โ€” simple, predictable. No `str.format()` edge cases. | +| 7 | History recording reliability | **Fire-and-forget** (`asyncio.create_task`) โ€” never blocks query response | +| 8 | History data retention | **Manual cleanup only** in Package 3 | +| 9 | Timing capture location | **Inline in query.py** โ€” centralized, one file changes | +| 10 | Frontend timing visualization | **CSS width bars** โ€” no charting library | +| 11 | History pagination | **Offset-based** (`limit` + `offset`) | +| 12 | NavBar order | **LTT ยท RAG Database ยท System Prompts ยท History** | +| 13 | Default seed templates | **All 3 profiles start identical** (current hardcoded prompts) โ€” users customize from a common baseline | +| 14 | Reset button granularity | **Both** โ€” per-step reset icon (โ†บ) on each textarea label, plus "Reset All to Defaults" button in the action bar | + +--- + +## Pre-Implementation Checklist + +Before starting implementation, verify: +- [ ] All existing backend tests pass (`cd backend && pytest app/test/ -v`) +- [ ] All existing frontend tests pass (`cd frontend && npm test`) +- [ ] AGENTS.md updated to reflect current project state (no longer "Greenfield") +- [ ] Plan reviewed and approved by user diff --git a/backend/app/core/dependencies.py b/backend/app/core/dependencies.py index 28b1a9f..ac7f087 100644 --- a/backend/app/core/dependencies.py +++ b/backend/app/core/dependencies.py @@ -22,3 +22,9 @@ def get_rag_service(): from app.services.rag import RAGService llm = get_llm_client() return RAGService(llm_client=llm) + + +def get_prompt_service(): + from app.services.prompt_service import PromptService + settings = get_settings_cached() + return PromptService(db_path=settings.prompts_db_path) diff --git a/backend/app/main.py b/backend/app/main.py index 42cfb5a..2d2ef5c 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -6,7 +6,7 @@ from pathlib import Path from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.routers import ingest, query, documents +from app.routers import ingest, query, documents, prompts from app.core.config import get_settings from app.core.sqlite_db import ( get_prompts_db, @@ -52,6 +52,7 @@ app.add_middleware( app.include_router(ingest.router, prefix="/api/v1") app.include_router(query.router, prefix="/api/v1") app.include_router(documents.router, prefix="/api/v1") +app.include_router(prompts.router) _prompts_conn = get_prompts_db() init_prompts_db(_prompts_conn) diff --git a/backend/app/models/prompts.py b/backend/app/models/prompts.py new file mode 100644 index 0000000..e6d6f54 --- /dev/null +++ b/backend/app/models/prompts.py @@ -0,0 +1,29 @@ +"""Pydantic schemas for the prompt-profile management endpoints.""" + +from pydantic import BaseModel + + +class ProfileItem(BaseModel): + name: str + is_active: bool + + +class ProfileListResponse(BaseModel): + profiles: list[ProfileItem] + + +class PromptSetResponse(BaseModel): + profile_name: str + prompts: dict[str, str] + + +class PromptUpdateRequest(BaseModel): + template: str + + +class PromptBatchUpdateRequest(BaseModel): + prompts: dict[str, str] + + +class ResetToDefaultsRequest(BaseModel): + step: str | None = None diff --git a/backend/app/routers/prompts.py b/backend/app/routers/prompts.py new file mode 100644 index 0000000..7d43d21 --- /dev/null +++ b/backend/app/routers/prompts.py @@ -0,0 +1,81 @@ +import logging + +from fastapi import APIRouter, HTTPException + +from app.core.dependencies import get_prompt_service +from app.models.prompts import ( + ProfileListResponse, + ProfileItem, + PromptSetResponse, + PromptUpdateRequest, + PromptBatchUpdateRequest, + ResetToDefaultsRequest, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/api/v1/prompts", tags=["prompts"]) + +_VALID_NAMES = {"A", "B", "C"} +_VALID_STEPS = {"decompose", "filter", "generate"} + + +def _ensure_valid_name(name: str) -> None: + if name not in _VALID_NAMES: + raise HTTPException(status_code=400, detail=f"Invalid profile name '{name}'. Must be one of A, B, C.") + + +def _ensure_valid_step(step: str) -> None: + if step not in _VALID_STEPS: + raise HTTPException(status_code=400, detail=f"Invalid step '{step}'. Must be one of decompose, filter, generate.") + + +@router.get("/profiles", response_model=ProfileListResponse) +def list_profiles(): + svc = get_prompt_service() + profiles = [ProfileItem(**p) for p in svc.list_profiles()] + return ProfileListResponse(profiles=profiles) + + +@router.get("/profiles/{name}", response_model=PromptSetResponse) +def get_profile_prompts(name: str): + _ensure_valid_name(name) + svc = get_prompt_service() + prompts = svc.get_profile_prompts(name) + return PromptSetResponse(profile_name=name, prompts=prompts) + + +@router.put("/profiles/{name}/activate") +def activate_profile(name: str): + _ensure_valid_name(name) + svc = get_prompt_service() + svc.activate_profile(name) + return {"status": "ok", "active_profile": name} + + +@router.put("/profiles/{name}/all") +def update_all_prompts(name: str, body: PromptBatchUpdateRequest): + _ensure_valid_name(name) + svc = get_prompt_service() + svc.update_all_prompts(name, body.prompts) + return {"status": "ok", "profile": name} + + +@router.put("/profiles/{name}/reset") +def reset_to_defaults(name: str, body: ResetToDefaultsRequest | None = None): + _ensure_valid_name(name) + step = body.step if body else None + if step is not None: + _ensure_valid_step(step) + svc = get_prompt_service() + svc.reset_to_defaults(name, step=step) + return {"status": "ok", "profile": name, "reset_step": step or "all"} + + +@router.put("/profiles/{name}/{step}") +def update_prompt(name: str, step: str, body: PromptUpdateRequest): + _ensure_valid_name(name) + _ensure_valid_step(step) + svc = get_prompt_service() + svc.update_prompt(name, step, body.template) + return {"status": "ok", "profile": name, "step": step} diff --git a/backend/app/services/prompt_service.py b/backend/app/services/prompt_service.py new file mode 100644 index 0000000..7e605a6 --- /dev/null +++ b/backend/app/services/prompt_service.py @@ -0,0 +1,168 @@ +"""Prompt profile management service. + +Reads and writes prompt templates in the prompts SQLite database. +Uses sync sqlite3 โ€” all operations are instant local reads/writes. +""" + +import logging +import sqlite3 + +from app.core.sqlite_db import _SEED_TEMPLATES + +logger = logging.getLogger(__name__) + +_VALID_NAMES = {"A", "B", "C"} +_VALID_STEPS = {"decompose", "filter", "generate"} + + +def _connect(db_path: str) -> sqlite3.Connection: + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + return conn + + +class PromptService: + """CRUD operations for prompt profiles and templates. + + Each method opens its own connection so the service is safe to + instantiate once per request without holding open file handles. + """ + + def __init__(self, db_path: str) -> None: + self._db_path = db_path + + # โ”€โ”€ helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def _validate_name(self, name: str) -> None: + if name not in _VALID_NAMES: + raise ValueError(f"Invalid profile name '{name}'. Must be one of A, B, C.") + + def _validate_step(self, step: str) -> None: + if step not in _VALID_STEPS: + raise ValueError(f"Invalid step '{step}'. Must be one of decompose, filter, generate.") + + # โ”€โ”€ read operations โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def get_active_profile_name(self) -> str: + """Return the name of the currently active profile.""" + with _connect(self._db_path) as conn: + row = conn.execute( + "SELECT name FROM system_prompt_profiles WHERE is_active=1" + ).fetchone() + if row is None: + raise RuntimeError("No active prompt profile found.") + return row["name"] + + def get_prompt_template(self, step: str) -> str: + """Return the prompt template for *step* of the active profile.""" + self._validate_step(step) + with _connect(self._db_path) as conn: + row = conn.execute( + """ + SELECT sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.is_active=1 AND sp.step_name=? + """, + (step,), + ).fetchone() + if row is None: + raise RuntimeError(f"No template found for step '{step}'.") + return row["prompt_template"] + + def list_profiles(self) -> list[dict]: + """Return all profiles with their active status.""" + with _connect(self._db_path) as conn: + rows = conn.execute( + "SELECT name, is_active FROM system_prompt_profiles ORDER BY name" + ).fetchall() + return [{"name": r["name"], "is_active": bool(r["is_active"])} for r in rows] + + def get_profile_prompts(self, name: str) -> dict: + """Return all three prompt templates for the given profile.""" + self._validate_name(name) + with _connect(self._db_path) as conn: + rows = conn.execute( + """ + SELECT sp.step_name, sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.name=? + ORDER BY sp.step_name + """, + (name,), + ).fetchall() + return {r["step_name"]: r["prompt_template"] for r in rows} + + # โ”€โ”€ write operations โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + def activate_profile(self, name: str) -> None: + """Set *name* as the active profile (deactivates all others).""" + self._validate_name(name) + with _connect(self._db_path) as conn: + conn.execute("UPDATE system_prompt_profiles SET is_active=0") + conn.execute( + "UPDATE system_prompt_profiles SET is_active=1 WHERE name=?", + (name,), + ) + conn.commit() + logger.info("Activated prompt profile '%s'.", name) + + def update_prompt(self, name: str, step: str, template: str) -> None: + """Update a single prompt template for the given profile.""" + self._validate_name(name) + self._validate_step(step) + with _connect(self._db_path) as conn: + conn.execute( + """ + UPDATE system_prompts + SET prompt_template=?, updated_at=datetime('now') + WHERE profile_id=(SELECT id FROM system_prompt_profiles WHERE name=?) + AND step_name=? + """, + (template, name, step), + ) + conn.commit() + logger.info("Updated prompt: profile='%s' step='%s'.", name, step) + + def update_all_prompts(self, name: str, prompts: dict[str, str]) -> None: + """Batch-update all three prompt templates for the given profile.""" + self._validate_name(name) + for step in prompts: + self._validate_step(step) + with _connect(self._db_path) as conn: + for step, template in prompts.items(): + conn.execute( + """ + UPDATE system_prompts + SET prompt_template=?, updated_at=datetime('now') + WHERE profile_id=(SELECT id FROM system_prompt_profiles WHERE name=?) + AND step_name=? + """, + (template, name, step), + ) + conn.commit() + logger.info("Batch-updated all prompts for profile '%s'.", name) + + def reset_to_defaults(self, name: str, step: str | None = None) -> None: + """Reset prompt template(s) to the built-in seed defaults. + + If *step* is ``None``, all three templates are reset. + """ + self._validate_name(name) + steps = _VALID_STEPS if step is None else {step} + for s in steps: + self._validate_step(s) + with _connect(self._db_path) as conn: + for s in steps: + conn.execute( + """ + UPDATE system_prompts + SET prompt_template=?, updated_at=datetime('now') + WHERE profile_id=(SELECT id FROM system_prompt_profiles WHERE name=?) + AND step_name=? + """, + (_SEED_TEMPLATES[s], name, s), + ) + conn.commit() + logger.info("Reset prompts for profile '%s': steps=%s.", name, steps) diff --git a/backend/app/test/conftest.py b/backend/app/test/conftest.py index 12b0798..42323a1 100644 --- a/backend/app/test/conftest.py +++ b/backend/app/test/conftest.py @@ -30,3 +30,41 @@ def mock_asr_client(monkeypatch): def chroma_test_dir(tmp_path): """Provide a temporary directory for isolated ChromaDB instances.""" return tmp_path / "chroma_test" + + +@pytest.fixture +def mock_prompt_service(): + """Mock PromptService for tests that don't need real DB.""" + class _MockPromptService: + def __init__(self): + self._template = "Test template: {question}" + + def get_active_profile_name(self) -> str: + return "A" + + def get_prompt_template(self, step: str) -> str: + return self._template + + def list_profiles(self) -> list[dict]: + return [ + {"name": "A", "is_active": True}, + {"name": "B", "is_active": False}, + {"name": "C", "is_active": False}, + ] + + def activate_profile(self, name: str) -> None: + pass + + def get_profile_prompts(self, name: str) -> dict: + return {"decompose": self._template, "filter": self._template, "generate": self._template} + + def update_prompt(self, name: str, step: str, template: str) -> None: + pass + + def update_all_prompts(self, name: str, prompts: dict[str, str]) -> None: + pass + + def reset_to_defaults(self, name: str, step: str | None = None) -> None: + pass + + return _MockPromptService() diff --git a/backend/app/test/test_phase3_prompt_service.py b/backend/app/test/test_phase3_prompt_service.py new file mode 100644 index 0000000..16411bc --- /dev/null +++ b/backend/app/test/test_phase3_prompt_service.py @@ -0,0 +1,248 @@ +"""Tests for Package 3.2 PromptService โ€” CRUD for prompt profiles and templates. + +Uses real sqlite3 with tmp_path for full isolation. No mocks. +Each test gets its own fresh database seeded with A/B/C profiles. +""" + +import sqlite3 + +import pytest + +from app.core.sqlite_db import _SEED_TEMPLATES, init_prompts_db, seed_default_profiles +from app.services.prompt_service import PromptService + + +# โ”€โ”€ Helper โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def _create_service(tmp_path) -> PromptService: + db_path = str(tmp_path / "test.db") + conn = sqlite3.connect(db_path) + conn.row_factory = sqlite3.Row + conn.execute("PRAGMA foreign_keys=ON") + init_prompts_db(conn) + seed_default_profiles(conn) + conn.close() + return PromptService(db_path=db_path) + + +# โ”€โ”€ list_profiles โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_list_profiles_returns_abc_with_a_active(tmp_path): + svc = _create_service(tmp_path) + profiles = svc.list_profiles() + + assert len(profiles) == 3 + names = [p["name"] for p in profiles] + assert names == ["A", "B", "C"] + + active_map = {p["name"]: p["is_active"] for p in profiles} + assert active_map["A"] is True + assert active_map["B"] is False + assert active_map["C"] is False + + +# โ”€โ”€ activate_profile โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_activate_profile_b(tmp_path): + svc = _create_service(tmp_path) + svc.activate_profile("B") + + profiles = svc.list_profiles() + active_map = {p["name"]: p["is_active"] for p in profiles} + assert active_map["A"] is False + assert active_map["B"] is True + assert active_map["C"] is False + + +def test_activate_profile_c(tmp_path): + svc = _create_service(tmp_path) + svc.activate_profile("C") + + profiles = svc.list_profiles() + active_map = {p["name"]: p["is_active"] for p in profiles} + assert active_map["A"] is False + assert active_map["B"] is False + assert active_map["C"] is True + + +def test_activate_profile_invalid_name_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid profile name"): + svc.activate_profile("D") + + +# โ”€โ”€ get_active_profile_name โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_get_active_profile_name_returns_a_after_seed(tmp_path): + svc = _create_service(tmp_path) + assert svc.get_active_profile_name() == "A" + + +def test_get_active_profile_name_after_switch(tmp_path): + svc = _create_service(tmp_path) + svc.activate_profile("B") + assert svc.get_active_profile_name() == "B" + + +# โ”€โ”€ get_profile_prompts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_get_profile_prompts_returns_all_three_steps(tmp_path): + svc = _create_service(tmp_path) + prompts = svc.get_profile_prompts("A") + + assert set(prompts.keys()) == {"decompose", "filter", "generate"} + assert "{question}" in prompts["decompose"] + assert "{question}" in prompts["filter"] + assert "{question}" in prompts["generate"] + + +def test_get_profile_prompts_invalid_name_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid profile name"): + svc.get_profile_prompts("D") + + +# โ”€โ”€ get_prompt_template โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_get_prompt_template_for_active_profile(tmp_path): + svc = _create_service(tmp_path) + template = svc.get_prompt_template("decompose") + assert template == _SEED_TEMPLATES["decompose"] + + +def test_get_prompt_template_after_activate(tmp_path): + svc = _create_service(tmp_path) + svc.update_prompt("B", "decompose", "B custom template") + svc.activate_profile("B") + assert svc.get_prompt_template("decompose") == "B custom template" + + +def test_get_prompt_template_invalid_step_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid step"): + svc.get_prompt_template("nonexistent") + + +# โ”€โ”€ update_prompt โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_update_prompt_persists_change(tmp_path): + svc = _create_service(tmp_path) + new_template = "Custom decompose prompt for {question}" + + svc.update_prompt("A", "decompose", new_template) + prompts = svc.get_profile_prompts("A") + + assert prompts["decompose"] == new_template + assert prompts["filter"] == _SEED_TEMPLATES["filter"] + assert prompts["generate"] == _SEED_TEMPLATES["generate"] + + +def test_update_prompt_invalid_name_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid profile name"): + svc.update_prompt("D", "decompose", "template") + + +def test_update_prompt_invalid_step_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid step"): + svc.update_prompt("A", "nonexistent", "template") + + +# โ”€โ”€ update_all_prompts โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_update_all_prompts_batch(tmp_path): + svc = _create_service(tmp_path) + new_prompts = { + "decompose": "New decompose", + "filter": "New filter", + "generate": "New generate", + } + + svc.update_all_prompts("A", new_prompts) + prompts = svc.get_profile_prompts("A") + + assert prompts["decompose"] == "New decompose" + assert prompts["filter"] == "New filter" + assert prompts["generate"] == "New generate" + + +def test_update_all_prompts_invalid_name_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid profile name"): + svc.update_all_prompts("D", {"decompose": "x"}) + + +def test_update_all_prompts_invalid_step_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid step"): + svc.update_all_prompts("A", {"nonexistent": "x"}) + + +# โ”€โ”€ reset_to_defaults โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_reset_to_defaults_all_steps(tmp_path): + svc = _create_service(tmp_path) + svc.update_all_prompts("A", { + "decompose": "MODIFIED decompose", + "filter": "MODIFIED filter", + "generate": "MODIFIED generate", + }) + + svc.reset_to_defaults("A", step=None) + + prompts = svc.get_profile_prompts("A") + assert prompts["decompose"] == _SEED_TEMPLATES["decompose"] + assert prompts["filter"] == _SEED_TEMPLATES["filter"] + assert prompts["generate"] == _SEED_TEMPLATES["generate"] + + +def test_reset_to_defaults_single_step(tmp_path): + svc = _create_service(tmp_path) + svc.update_all_prompts("A", { + "decompose": "MODIFIED decompose", + "filter": "MODIFIED filter", + "generate": "MODIFIED generate", + }) + + svc.reset_to_defaults("A", step="filter") + + prompts = svc.get_profile_prompts("A") + assert prompts["decompose"] == "MODIFIED decompose" + assert prompts["filter"] == _SEED_TEMPLATES["filter"] + assert prompts["generate"] == "MODIFIED generate" + + +def test_reset_to_defaults_invalid_name_raises(tmp_path): + svc = _create_service(tmp_path) + with pytest.raises(ValueError, match="Invalid profile name"): + svc.reset_to_defaults("D") + + +# โ”€โ”€ Edge cases โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_empty_string_template_allowed(tmp_path): + svc = _create_service(tmp_path) + svc.update_prompt("A", "decompose", "") + prompts = svc.get_profile_prompts("A") + assert prompts["decompose"] == "" + + +def test_very_long_template_allowed(tmp_path): + svc = _create_service(tmp_path) + long_template = "x" * 50_000 + + svc.update_prompt("A", "decompose", long_template) + prompts = svc.get_profile_prompts("A") + assert prompts["decompose"] == long_template + assert len(prompts["decompose"]) == 50_000 diff --git a/backend/app/test/test_phase3_prompts_router.py b/backend/app/test/test_phase3_prompts_router.py new file mode 100644 index 0000000..1be95a4 --- /dev/null +++ b/backend/app/test/test_phase3_prompts_router.py @@ -0,0 +1,216 @@ +"""Tests for Package 3.2 prompts router โ€” HTTP endpoint integration tests. + +Uses real sqlite3 with tmp_path. TestClient hits a minimal FastAPI app +wired with the prompts router. No mocks on the DB layer. +""" + +import pytest +from fastapi import FastAPI +from fastapi.testclient import TestClient + +from app.core.sqlite_db import init_prompts_db, seed_default_profiles, _get_db +from app.routers.prompts import router + + +# โ”€โ”€ Fixture โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +@pytest.fixture +def client(tmp_path, monkeypatch): + prompts_path = str(tmp_path / "prompts.db") + monkeypatch.setenv("PROMPTS_DB_PATH", prompts_path) + monkeypatch.setenv("HISTORY_DB_PATH", str(tmp_path / "history.db")) + + from app.core.config import get_settings + get_settings.cache_clear() + from app.core.dependencies import get_settings_cached + get_settings_cached.cache_clear() + + conn = _get_db(prompts_path) + init_prompts_db(conn) + seed_default_profiles(conn) + conn.close() + + test_app = FastAPI() + test_app.include_router(router) + + yield TestClient(test_app) + + get_settings_cached.cache_clear() + get_settings.cache_clear() + + +# โ”€โ”€ GET /profiles โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_get_profiles_returns_200_with_three_items(client): + resp = client.get("/api/v1/prompts/profiles") + assert resp.status_code == 200 + + data = resp.json() + assert len(data["profiles"]) == 3 + + names = [p["name"] for p in data["profiles"]] + assert names == ["A", "B", "C"] + + active_map = {p["name"]: p["is_active"] for p in data["profiles"]} + assert active_map["A"] is True + assert active_map["B"] is False + assert active_map["C"] is False + + +# โ”€โ”€ GET /profiles/{name} โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_get_profile_prompts_a_returns_200(client): + resp = client.get("/api/v1/prompts/profiles/A") + assert resp.status_code == 200 + + data = resp.json() + assert data["profile_name"] == "A" + assert set(data["prompts"].keys()) == {"decompose", "filter", "generate"} + + +def test_get_profile_prompts_invalid_returns_400(client): + resp = client.get("/api/v1/prompts/profiles/D") + assert resp.status_code == 400 + + +# โ”€โ”€ PUT /profiles/{name}/activate โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_activate_profile_b_then_get_confirms(client): + resp = client.put("/api/v1/prompts/profiles/B/activate") + assert resp.status_code == 200 + assert resp.json()["active_profile"] == "B" + + resp = client.get("/api/v1/prompts/profiles") + active_map = {p["name"]: p["is_active"] for p in resp.json()["profiles"]} + assert active_map["B"] is True + assert active_map["A"] is False + assert active_map["C"] is False + + +# โ”€โ”€ PUT /profiles/{name}/{step} โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_update_prompt_returns_200_and_persists(client): + new_template = "Updated decompose for {question}" + + resp = client.put( + "/api/v1/prompts/profiles/A/decompose", + json={"template": new_template}, + ) + assert resp.status_code == 200 + assert resp.json()["step"] == "decompose" + + resp = client.get("/api/v1/prompts/profiles/A") + assert resp.json()["prompts"]["decompose"] == new_template + + +# โ”€โ”€ PUT /profiles/{name}/all โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_update_all_prompts_batch_returns_200_and_persists(client): + new_prompts = { + "decompose": "Batch decompose", + "filter": "Batch filter", + "generate": "Batch generate", + } + + resp = client.put( + "/api/v1/prompts/profiles/A/all", + json={"prompts": new_prompts}, + ) + assert resp.status_code == 200 + assert resp.json()["profile"] == "A" + + resp = client.get("/api/v1/prompts/profiles/A") + prompts = resp.json()["prompts"] + assert prompts["decompose"] == "Batch decompose" + assert prompts["filter"] == "Batch filter" + assert prompts["generate"] == "Batch generate" + + +# โ”€โ”€ PUT /profiles/{name}/reset โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_reset_single_step_returns_200(client): + from app.core.sqlite_db import _SEED_TEMPLATES + + client.put( + "/api/v1/prompts/profiles/A/decompose", + json={"template": "MODIFIED"}, + ) + + resp = client.put( + "/api/v1/prompts/profiles/A/reset", + json={"step": "decompose"}, + ) + assert resp.status_code == 200 + assert resp.json()["reset_step"] == "decompose" + + resp = client.get("/api/v1/prompts/profiles/A") + assert resp.json()["prompts"]["decompose"] == _SEED_TEMPLATES["decompose"] + + +def test_reset_all_steps_returns_200(client): + from app.core.sqlite_db import _SEED_TEMPLATES + + client.put("/api/v1/prompts/profiles/A/all", json={"prompts": { + "decompose": "MODIFIED decompose", + "filter": "MODIFIED filter", + "generate": "MODIFIED generate", + }}) + + resp = client.put( + "/api/v1/prompts/profiles/A/reset", + json={"step": None}, + ) + assert resp.status_code == 200 + assert resp.json()["reset_step"] == "all" + + resp = client.get("/api/v1/prompts/profiles/A") + prompts = resp.json()["prompts"] + assert prompts["decompose"] == _SEED_TEMPLATES["decompose"] + assert prompts["filter"] == _SEED_TEMPLATES["filter"] + assert prompts["generate"] == _SEED_TEMPLATES["generate"] + + +def test_reset_with_no_body_resets_all(client): + from app.core.sqlite_db import _SEED_TEMPLATES + + client.put("/api/v1/prompts/profiles/A/all", json={"prompts": { + "decompose": "MODIFIED", + "filter": "MODIFIED", + "generate": "MODIFIED", + }}) + + resp = client.put("/api/v1/prompts/profiles/A/reset") + assert resp.status_code == 200 + assert resp.json()["reset_step"] == "all" + + resp = client.get("/api/v1/prompts/profiles/A") + prompts = resp.json()["prompts"] + assert prompts["decompose"] == _SEED_TEMPLATES["decompose"] + assert prompts["filter"] == _SEED_TEMPLATES["filter"] + assert prompts["generate"] == _SEED_TEMPLATES["generate"] + + +# โ”€โ”€ Validation: invalid name and step โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ + + +def test_invalid_profile_name_returns_400(client): + resp = client.get("/api/v1/prompts/profiles/D") + assert resp.status_code == 400 + + resp = client.put("/api/v1/prompts/profiles/D/activate") + assert resp.status_code == 400 + + +def test_invalid_step_returns_400(client): + resp = client.put( + "/api/v1/prompts/profiles/A/nonexistent", + json={"template": "test"}, + ) + assert resp.status_code == 400