From 3b868a01330e9bbe878b174aa29e35fcc999e792 Mon Sep 17 00:00:00 2001 From: Woody Date: Mon, 27 Apr 2026 11:14:27 +0800 Subject: [PATCH] feat(prompts): integrate filter_per_subq with PromptService, fix seed bugs, restructure UI Break the hardcoded per-sub-q filter prompt into 3 editable PromptService templates (filter_intro, filter_section, filter_outro) with placeholders for the for-loop iteration pattern. Refactor RelevanceFilter._build_per_subq_prompt() to compose them at runtime, falling back to built-in defaults when PromptService is unavailable. Fix two latent bugs from Package 4: - generate_per_subq was called by rag.py but never added to _VALID_STEPS or DB seed (would ValueError at runtime) - _SEED_GENERATE placeholder mismatch: flat generate_response() expects {question}/{context} but Package 4 changed it to {context_sections}. Restored flat template; generate_per_subq now holds {context_sections}. Add database backfill migration in seed_default_profiles() to INSERT OR IGNORE missing steps into existing profile rows, ensuring all 7 steps exist on restart. Restructure System Prompts UI: remove unused flat filter/generate steps, replace with Step 2.1-2.3 (filter_intro/section/outro) and Step 3 (generate_per_subq). Update PlaceholderDocs with {context_sections}, {subq_idx}, {subq_question}. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- .plans/package4_enhancement_plan.md | 69 ++++++++++++++++++- backend/app/core/sqlite_db.py | 61 +++++++++++++--- backend/app/routers/prompts.py | 10 ++- backend/app/services/prompt_service.py | 10 ++- backend/app/services/relevance_filter.py | 48 +++++++++---- backend/app/test/conftest.py | 9 +++ .../app/test/test_phase3_prompt_service.py | 9 ++- .../app/test/test_phase3_prompts_router.py | 5 +- backend/app/test/test_phase3_sqlite_db.py | 66 +++++++++++++++--- .../app/test/test_phase4_prompt_templates.py | 43 +++++++----- frontend/src/components/PlaceholderDocs.tsx | 5 +- frontend/src/components/PromptEditor.tsx | 10 ++- .../test/components/PlaceholderDocs.test.tsx | 12 +++- .../src/test/components/PromptEditor.test.tsx | 63 +++++++++-------- 14 files changed, 333 insertions(+), 87 deletions(-) diff --git a/.plans/package4_enhancement_plan.md b/.plans/package4_enhancement_plan.md index cad3fe5..29c078d 100644 --- a/.plans/package4_enhancement_plan.md +++ b/.plans/package4_enhancement_plan.md @@ -2,7 +2,7 @@ **Source**: User request (2026-04-26) **Scope**: Refactor the 3-step RAG query pipeline so retrieval, filtering, and response generation are organized per sub-question instead of batch-flattened. -**Status**: ✅ Complete — All 7 sub-phases implemented (2026-04-26) +**Status**: ✅ Complete — All 7 sub-phases implemented (2026-04-26). Phase 4a Prompt Integration added (2026-04-27). --- @@ -711,6 +711,72 @@ Minor enhancement: --- +## Phase 4a: Prompt Service Integration for Per-Sub-Q Filter (2026-04-27) + +**Root issue**: `filter_per_subquestion()` in `relevance_filter.py` had a hardcoded prompt (`_build_per_subq_prompt()`) — completely bypassing `PromptService`. Users could not edit the per-sub-q filter prompt on the System Prompts page, unlike the flat `filter` step which was already prompt-service-driven. + +**Solution**: Broke the per-sub-q filter prompt into **3 composable pieces**, each a separately editable step on the System Prompts page: + +| Step Name | Label | Placeholders | Default | +|-----------|-------|-------------|---------| +| `filter_intro` | Step 2.1: Filter Intro (Preamble) | *(none)* | `"Evaluate each chunk for relevance to its associated sub-question only."` | +| `filter_section` | Step 2.2: Filter Section (Per Sub-Q) | `{subq_idx}`, `{subq_question}`, `{chunks}` | `'Sub-question {subq_idx}: "{subq_question}"\n{chunks}'` | +| `filter_outro` | Step 2.3: Filter Outro (Format) | *(none)* | JSON format instructions + example | + +The `RelevanceFilter._build_per_subq_prompt()` now composes them at runtime: +``` +filter_intro + [filter_section.replace(...) for each sub-q] + filter_outro +``` +Falls back to built-in defaults when `PromptService` is unavailable. + +### Bugs Fixed + +1. **`generate_per_subq` not seeded**: `rag.py` called `get_prompt_template("generate_per_subq")` but this step name was never added to `_VALID_STEPS`, `_SEED_STEPS`, or `_SEED_TEMPLATES` — would crash at runtime with `ValueError`. Now properly seeded with `{context_sections}` placeholder. + +2. **`_SEED_GENERATE` placeholder mismatch from Package 4**: The flat `generate_response()` expects `{question}`/`{context}` placeholders, but Package 4 changed the seed template to use `{context_sections}` (intended for per-sub-q generate). Restored flat template; `generate_per_subq` now holds `{context_sections}`. + +### Database Backfill Migration + +The existing `seed_default_profiles()` only inserted steps for NEWLY created profiles. Added a backfill loop that iterates ALL existing profiles and `INSERT OR IGNORE`s any missing step names. This ensures existing A/B/C profiles pick up `filter_intro`, `filter_section`, `filter_outro`, and `generate_per_subq` on restart. + +### System Prompts UI Restructured + +The flat `filter` and `generate` steps were removed from the UI (they're unused by the current pipeline). The page now shows 5 steps: + +| UI Order | Label | Step Key | +|----------|-------|----------| +| 1 | Step 1: Query Decomposition | `decompose` | +| 2 | Step 2.1: Filter Intro (Preamble) | `filter_intro` | +| 3 | Step 2.2: Filter Section (Per Sub-Q) | `filter_section` | +| 4 | Step 2.3: Filter Outro (Format) | `filter_outro` | +| 5 | Step 3: Generate (Per-Sub-Question) | `generate_per_subq` | + +The old `filter` and `generate` templates remain in the DB (for API backward compatibility) but are hidden from the UI. + +### Files Changed + +| File | Change | +|------|--------| +| `backend/app/core/sqlite_db.py` | 3 new seed templates + `generate_per_subq` seed; backfill migration; restored `_SEED_GENERATE` to `{question}`/`{context}` | +| `backend/app/services/prompt_service.py` | Added 4 step names to `_VALID_STEPS` | +| `backend/app/routers/prompts.py` | Added 4 step names to `_VALID_STEPS` | +| `backend/app/services/relevance_filter.py` | Refactored `_build_per_subq_prompt()` to use PromptService + built-in fallback constants | +| `frontend/src/components/PromptEditor.tsx` | Replaced unused flat steps with 5-step per-sub-q layout (Step 2.1-2.3 + Step 3) | +| `frontend/src/components/PlaceholderDocs.tsx` | Added `{context_sections}`, `{subq_idx}`, `{subq_question}` docs | +| `backend/app/test/conftest.py` | Added 4 new templates to mock | +| `backend/app/test/test_phase3_sqlite_db.py` | Updated counts (9→21 prompts) and placeholder assertions | +| `backend/app/test/test_phase3_prompt_service.py` | Updated step set + placeholder assertions | +| `backend/app/test/test_phase3_prompts_router.py` | Updated step set assertion | +| `backend/app/test/test_phase4_prompt_templates.py` | Updated for split generate/generate_per_subq | +| `frontend/src/test/components/PromptEditor.test.tsx` | Updated to 5 textareas, new labels, new placeholder layout | +| `frontend/src/test/components/PlaceholderDocs.test.tsx` | Updated to 6 placeholders | + +### Test Results (Post-Phase 4a) +- **Backend**: 295 passed, 5 skipped (pre-existing) +- **Frontend**: 182 passed, 1 pre-existing failure (unrelated `file-input` e2e) + +--- + ## Sub-Phase Summary | Sub-Phase | Scope | Backend | Frontend | Tests | Status | @@ -722,6 +788,7 @@ Minor enhancement: | 4.5 | Frontend types + state | None | `types/index.ts`, `lib/queries.tsx` | `test_phase4_stream_state.test.tsx`, `test_phase4_types.test.ts` | ✅ Complete | | 4.6 | Frontend rendering | None | `ResponsePanel.tsx`, `citationParser.ts`, `ExtractedQuestionsDisplay.tsx` | `test_phase4_response_panel.test.tsx`, `test_phase4_citation_parser.test.ts` | ✅ Complete | | 4.7 | Testing & polish | All affected files | All affected files | Integration + acceptance + e2e tests | ✅ Complete | +| 4a | Prompt service integration for filter_per_subq | `sqlite_db.py`, `prompt_service.py`, `prompts.py`, `relevance_filter.py` | `PromptEditor.tsx`, `PlaceholderDocs.tsx` | Updated 7 test files, 13 total files changed | ✅ Complete | --- diff --git a/backend/app/core/sqlite_db.py b/backend/app/core/sqlite_db.py index 52af255..1c5a485 100644 --- a/backend/app/core/sqlite_db.py +++ b/backend/app/core/sqlite_db.py @@ -22,28 +22,60 @@ _SEED_FILTER = ( ) _SEED_GENERATE = ( - "You must answer each sub-question using ONLY the document chunks provided for it.\n" - "Do not use any external knowledge.\n" - "Format your answer as markdown sections — one section per sub-question.\n" - "Each section should start with \"## Sub-question N: \"\n" - "Each section should contain 1-5 bullet points.\n" - "Cite your sources inline using bracket labels, e.g. [filename, page N].\n" - "Place the citation at the end of each relevant bullet point.\n\n" + "Question: {question}\n\n" + "Answer the question using ONLY these document chunks. " + "Do not use any external knowledge. " + "Format your answer as bullet points. " + "Cite your sources inline using bracket labels, e.g. [filename, page N]. " + "Place the citation at the end of each relevant point.\n\n" + "Document chunks:\n{context}\n\n" + "Answer:" +) + +_SEED_GENERATE_PER_SUBQ = ( + "Answer each sub-question using ONLY its document chunks.\n" + "Format as markdown sections with ## Sub-question N: headers.\n" "{context_sections}\n\n" "Answer:" ) +_SEED_FILTER_INTRO = ( + "Evaluate each chunk for relevance to its associated sub-question only." +) + +_SEED_FILTER_SECTION = ( + '\nSub-question {subq_idx}: "{subq_question}"\n{chunks}' +) + +_SEED_FILTER_OUTRO = ( + "\nFor each chunk, rate its relevance 0-10 considering ONLY its associated sub-question.\n" + 'Return a JSON object mapping sub-question indices to arrays of scores.\n' + 'Example: {"0": [8.5, 3.2, 9.0], "1": [7.0, 9.1]}' +) + _SEED_PROFILES = [ ("A", 1), ("B", 0), ("C", 0), ] -_SEED_STEPS = ["decompose", "filter", "generate"] +_SEED_STEPS = [ + "decompose", + "filter", + "generate", + "generate_per_subq", + "filter_intro", + "filter_section", + "filter_outro", +] _SEED_TEMPLATES = { "decompose": _SEED_DECOMPOSE, "filter": _SEED_FILTER, "generate": _SEED_GENERATE, + "generate_per_subq": _SEED_GENERATE_PER_SUBQ, + "filter_intro": _SEED_FILTER_INTRO, + "filter_section": _SEED_FILTER_SECTION, + "filter_outro": _SEED_FILTER_OUTRO, } # ── Connection factories ──────────────────────────────────────────────────── @@ -156,5 +188,18 @@ def seed_default_profiles(conn: sqlite3.Connection) -> None: (profile_id, step, _SEED_TEMPLATES[step]), ) + # Backfill: add any newly-added step names to existing profiles that were + # created before the new steps existed. INSERT OR IGNORE is idempotent. + profile_rows = conn.execute( + "SELECT id FROM system_prompt_profiles" + ).fetchall() + for row in profile_rows: + for step in _SEED_STEPS: + if step in _SEED_TEMPLATES: + conn.execute( + "INSERT OR IGNORE INTO system_prompts (profile_id, step_name, prompt_template) VALUES (?, ?, ?)", + (row["id"], step, _SEED_TEMPLATES[step]), + ) + conn.commit() logger.info("Default profiles (A/B/C) seeded.") diff --git a/backend/app/routers/prompts.py b/backend/app/routers/prompts.py index 7d43d21..96a56de 100644 --- a/backend/app/routers/prompts.py +++ b/backend/app/routers/prompts.py @@ -17,7 +17,15 @@ logger = logging.getLogger(__name__) router = APIRouter(prefix="/api/v1/prompts", tags=["prompts"]) _VALID_NAMES = {"A", "B", "C"} -_VALID_STEPS = {"decompose", "filter", "generate"} +_VALID_STEPS = { + "decompose", + "filter", + "generate", + "generate_per_subq", + "filter_intro", + "filter_section", + "filter_outro", +} def _ensure_valid_name(name: str) -> None: diff --git a/backend/app/services/prompt_service.py b/backend/app/services/prompt_service.py index 7e605a6..8025002 100644 --- a/backend/app/services/prompt_service.py +++ b/backend/app/services/prompt_service.py @@ -12,7 +12,15 @@ from app.core.sqlite_db import _SEED_TEMPLATES logger = logging.getLogger(__name__) _VALID_NAMES = {"A", "B", "C"} -_VALID_STEPS = {"decompose", "filter", "generate"} +_VALID_STEPS = { + "decompose", + "filter", + "generate", + "generate_per_subq", + "filter_intro", + "filter_section", + "filter_outro", +} def _connect(db_path: str) -> sqlite3.Connection: diff --git a/backend/app/services/relevance_filter.py b/backend/app/services/relevance_filter.py index 07274d3..6307cd8 100644 --- a/backend/app/services/relevance_filter.py +++ b/backend/app/services/relevance_filter.py @@ -16,6 +16,18 @@ _BUILTIN_FILTER_TEMPLATE = ( "Return JSON array of scores.\n{chunks}\n" ) +_BUILTIN_FILTER_INTRO = "Evaluate each chunk for relevance to its associated sub-question only." + +_BUILTIN_FILTER_SECTION = ( + '\nSub-question {subq_idx}: "{subq_question}"\n{chunks}' +) + +_BUILTIN_FILTER_OUTRO = ( + "\nFor each chunk, rate its relevance 0-10 considering ONLY its associated sub-question.\n" + 'Return a JSON object mapping sub-question indices to arrays of scores.\n' + 'Example: {"0": [8.5, 3.2, 9.0], "1": [7.0, 9.1]}' +) + def _extract_json_from_markdown(response: str) -> str: if not isinstance(response, str): @@ -109,22 +121,28 @@ class RelevanceFilter: sub_questions: List[str], sub_chunks: List[List[Tuple[str, Dict]]], ) -> str: - sections: List[str] = [ - "Evaluate each chunk for relevance to its associated sub-question only." - ] - for idx, (sq, chunks) in enumerate(zip(sub_questions, sub_chunks)): - sections.append(f'\nSub-question {idx}: "{sq}"') - for c_idx, (text, _meta) in enumerate(chunks): - sections.append(f"Chunk {c_idx}: {text}") + if self._prompt_service is not None: + intro = self._prompt_service.get_prompt_template("filter_intro") + section = self._prompt_service.get_prompt_template("filter_section") + outro = self._prompt_service.get_prompt_template("filter_outro") + else: + intro = _BUILTIN_FILTER_INTRO + section = _BUILTIN_FILTER_SECTION + outro = _BUILTIN_FILTER_OUTRO - sections.append( - "\nFor each chunk, rate its relevance 0-10 considering ONLY its associated sub-question." - ) - sections.append( - 'Return a JSON object mapping sub-question indices to arrays of scores.\n' - 'Example: {"0": [8.5, 3.2, 9.0], "1": [7.0, 9.1]}' - ) - return "\n".join(sections) + parts: List[str] = [intro] + for idx, (sq, chunks) in enumerate(zip(sub_questions, sub_chunks)): + formatted_chunks = "\n".join( + f"Chunk {c_idx}: {text}" for c_idx, (text, _meta) in enumerate(chunks) + ) + parts.append( + section + .replace("{subq_idx}", str(idx)) + .replace("{subq_question}", sq) + .replace("{chunks}", formatted_chunks) + ) + parts.append(outro) + return "\n".join(parts) async def filter_per_subquestion( self, diff --git a/backend/app/test/conftest.py b/backend/app/test/conftest.py index 72f1044..71ad424 100644 --- a/backend/app/test/conftest.py +++ b/backend/app/test/conftest.py @@ -66,6 +66,15 @@ def mock_prompt_service(): "{context_sections}\n\n" "Answer:" ), + "filter_intro": "Evaluate each chunk for relevance to its associated sub-question only.", + "filter_section": ( + '\nSub-question {subq_idx}: "{subq_question}"\n{chunks}' + ), + "filter_outro": ( + "\nFor each chunk, rate its relevance 0-10 considering ONLY its associated sub-question.\n" + 'Return a JSON object mapping sub-question indices to arrays of scores.\n' + 'Example: {"0": [8.5, 3.2, 9.0], "1": [7.0, 9.1]}' + ), } class _MockPromptService: diff --git a/backend/app/test/test_phase3_prompt_service.py b/backend/app/test/test_phase3_prompt_service.py index baace5b..8315feb 100644 --- a/backend/app/test/test_phase3_prompt_service.py +++ b/backend/app/test/test_phase3_prompt_service.py @@ -95,10 +95,15 @@ def test_get_profile_prompts_returns_all_three_steps(tmp_path): svc = _create_service(tmp_path) prompts = svc.get_profile_prompts("A") - assert set(prompts.keys()) == {"decompose", "filter", "generate"} + assert set(prompts.keys()) == { + "decompose", "filter", "generate", + "generate_per_subq", "filter_intro", "filter_section", "filter_outro", + } assert "{question}" in prompts["decompose"] assert "{question}" in prompts["filter"] - assert "{context_sections}" in prompts["generate"] + assert "{question}" in prompts["generate"] + assert "{context}" in prompts["generate"] + assert "{context_sections}" in prompts["generate_per_subq"] def test_get_profile_prompts_invalid_name_raises(tmp_path): diff --git a/backend/app/test/test_phase3_prompts_router.py b/backend/app/test/test_phase3_prompts_router.py index 1be95a4..92bd1b0 100644 --- a/backend/app/test/test_phase3_prompts_router.py +++ b/backend/app/test/test_phase3_prompts_router.py @@ -68,7 +68,10 @@ def test_get_profile_prompts_a_returns_200(client): data = resp.json() assert data["profile_name"] == "A" - assert set(data["prompts"].keys()) == {"decompose", "filter", "generate"} + assert set(data["prompts"].keys()) == { + "decompose", "filter", "generate", + "generate_per_subq", "filter_intro", "filter_section", "filter_outro", + } def test_get_profile_prompts_invalid_returns_400(client): diff --git a/backend/app/test/test_phase3_sqlite_db.py b/backend/app/test/test_phase3_sqlite_db.py index 7372618..ae7be61 100644 --- a/backend/app/test/test_phase3_sqlite_db.py +++ b/backend/app/test/test_phase3_sqlite_db.py @@ -251,7 +251,7 @@ def test_seed_default_profiles_A_is_active(tmp_path, monkeypatch): def test_seed_default_profiles_creates_nine_prompts(tmp_path, monkeypatch): - """seed_default_profiles() should insert 9 prompt rows (3 profiles × 3 steps).""" + """seed_default_profiles() should insert 21 prompt rows (3 profiles × 7 steps).""" prompts_path = str(tmp_path / "prompts.db") history_path = str(tmp_path / "history.db") _patch_settings(monkeypatch, prompts_path, history_path) @@ -268,13 +268,17 @@ def test_seed_default_profiles_creates_nine_prompts(tmp_path, monkeypatch): JOIN system_prompt_profiles spp ON sp.profile_id = spp.id ORDER BY spp.name, sp.step_name""" ).fetchall() - assert len(rows) == 9 + assert len(rows) == 21 - # Each profile should have all 3 steps + # Each profile should have all 7 steps + expected_steps = { + "decompose", "filter", "generate", + "generate_per_subq", "filter_intro", "filter_section", "filter_outro", + } for profile in ("A", "B", "C"): profile_rows = [r for r in rows if r["profile_name"] == profile] steps = {r["step_name"] for r in profile_rows} - assert steps == {"decompose", "filter", "generate"} + assert steps == expected_steps conn.close() @@ -311,7 +315,7 @@ def test_seed_default_profiles_contains_expected_templates(tmp_path, monkeypatch assert "{question}" in filter_row["prompt_template"] assert "{chunks}" in filter_row["prompt_template"] - # Generate must have {context_sections} + # Generate must have {question} and {context} generate_row = conn.execute( """SELECT sp.prompt_template FROM system_prompts sp @@ -319,7 +323,50 @@ def test_seed_default_profiles_contains_expected_templates(tmp_path, monkeypatch WHERE spp.name='A' AND sp.step_name='generate'""" ).fetchone() assert generate_row is not None - assert "{context_sections}" in generate_row["prompt_template"] + assert "{question}" in generate_row["prompt_template"] + assert "{context}" in generate_row["prompt_template"] + + # Generate per-subq must have {context_sections} + gen_per_subq_row = conn.execute( + """SELECT sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.name='A' AND sp.step_name='generate_per_subq'""" + ).fetchone() + assert gen_per_subq_row is not None + assert "{context_sections}" in gen_per_subq_row["prompt_template"] + + # Filter intro exists (no required placeholders) + filter_intro_row = conn.execute( + """SELECT sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.name='A' AND sp.step_name='filter_intro'""" + ).fetchone() + assert filter_intro_row is not None + assert len(filter_intro_row["prompt_template"]) > 0 + + # Filter section must have per-subq placeholders + filter_section_row = conn.execute( + """SELECT sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.name='A' AND sp.step_name='filter_section'""" + ).fetchone() + assert filter_section_row is not None + assert "{subq_idx}" in filter_section_row["prompt_template"] + assert "{subq_question}" in filter_section_row["prompt_template"] + assert "{chunks}" in filter_section_row["prompt_template"] + + # Filter outro exists (no required placeholders) + filter_outro_row = conn.execute( + """SELECT sp.prompt_template + FROM system_prompts sp + JOIN system_prompt_profiles spp ON sp.profile_id = spp.id + WHERE spp.name='A' AND sp.step_name='filter_outro'""" + ).fetchone() + assert filter_outro_row is not None + assert len(filter_outro_row["prompt_template"]) > 0 conn.close() @@ -345,7 +392,7 @@ def test_seed_default_profiles_idempotent(tmp_path, monkeypatch): ).fetchone()[0] assert profile_count == 3 - assert prompt_count == 9 + assert prompt_count == 21 conn.close() @@ -361,7 +408,10 @@ def test_all_three_profiles_have_identical_seed_templates(tmp_path, monkeypatch) init_prompts_db(conn) seed_default_profiles(conn) - for step in ("decompose", "filter", "generate"): + for step in ( + "decompose", "filter", "generate", + "generate_per_subq", "filter_intro", "filter_section", "filter_outro", + ): rows = conn.execute( """SELECT spp.name, sp.prompt_template FROM system_prompts sp diff --git a/backend/app/test/test_phase4_prompt_templates.py b/backend/app/test/test_phase4_prompt_templates.py index 82356d9..13c5380 100644 --- a/backend/app/test/test_phase4_prompt_templates.py +++ b/backend/app/test/test_phase4_prompt_templates.py @@ -31,36 +31,43 @@ def prompt_service(): def test_generate_template_uses_context_sections(prompt_service): - """When PromptService returns the generate template, {context_sections} - placeholder must be present and old {context} removed.""" - template = prompt_service.get_prompt_template("generate") + """When PromptService returns the generate_per_subq template, + {context_sections} placeholder must be present.""" + template = prompt_service.get_prompt_template("generate_per_subq") assert "{context_sections}" in template - assert "{context}" not in template def test_builtin_generate_template_has_context_sections(): - """When no prompt_service, the built-in seed template uses - {context_sections} instead of {context}.""" - from app.core.sqlite_db import _SEED_GENERATE + """The per-subq seed template (_SEED_GENERATE_PER_SUBQ) uses + {context_sections}, while the flat generate uses {question}/{context}.""" + from app.core.sqlite_db import _SEED_GENERATE_PER_SUBQ, _SEED_GENERATE - assert "{context_sections}" in _SEED_GENERATE - assert "{context}" not in _SEED_GENERATE + assert "{context_sections}" in _SEED_GENERATE_PER_SUBQ + assert "{question}" in _SEED_GENERATE + assert "{context}" in _SEED_GENERATE def test_reset_to_defaults_includes_new_generate_template(prompt_service): - """After calling reset_to_defaults() on a profile, the generate step - uses {context_sections} placeholder.""" + """After calling reset_to_defaults(), generate_per_subq uses + {context_sections} and flat generate uses {question}/{context}.""" profile_name = prompt_service.get_active_profile_name() - prompt_service.update_prompt(profile_name, "generate", "custom template with {context}") + # Modify both templates + prompt_service.update_prompt(profile_name, "generate_per_subq", "custom per-subq") + prompt_service.update_prompt(profile_name, "generate", "custom flat") - modified = prompt_service.get_prompt_template("generate") - assert "{context}" in modified - assert "{context_sections}" not in modified + modified_per_subq = prompt_service.get_prompt_template("generate_per_subq") + assert "custom per-subq" == modified_per_subq + + modified_flat = prompt_service.get_prompt_template("generate") + assert "custom flat" == modified_flat prompt_service.reset_to_defaults(profile_name) - template = prompt_service.get_prompt_template("generate") - assert "{context_sections}" in template - assert "{context}" not in template + template_per_subq = prompt_service.get_prompt_template("generate_per_subq") + assert "{context_sections}" in template_per_subq + + template_flat = prompt_service.get_prompt_template("generate") + assert "{question}" in template_flat + assert "{context}" in template_flat diff --git a/frontend/src/components/PlaceholderDocs.tsx b/frontend/src/components/PlaceholderDocs.tsx index e51b28f..8ac5171 100644 --- a/frontend/src/components/PlaceholderDocs.tsx +++ b/frontend/src/components/PlaceholderDocs.tsx @@ -7,9 +7,12 @@ interface PlaceholderRow { } const PLACEHOLDERS: PlaceholderRow[] = [ - { placeholder: '{question}', description: 'The user\'s input question' }, + { placeholder: '{question}', description: "The user's input question" }, { placeholder: '{chunks}', description: 'Retrieved document chunks (filter step only)' }, { placeholder: '{context}', description: 'Formatted chunks with citations (generate step only)' }, + { placeholder: '{context_sections}', description: 'Per-sub-question context sections (generate per-subq only)' }, + { placeholder: '{subq_idx}', description: 'Sub-question index number (filter section only)' }, + { placeholder: '{subq_question}', description: 'Sub-question text (filter section only)' }, ] export const PlaceholderDocs: React.FC = () => { diff --git a/frontend/src/components/PromptEditor.tsx b/frontend/src/components/PromptEditor.tsx index 02b9c87..58ce81a 100644 --- a/frontend/src/components/PromptEditor.tsx +++ b/frontend/src/components/PromptEditor.tsx @@ -15,14 +15,20 @@ export interface PromptEditorProps { const STEPS = [ { key: 'decompose', label: 'Step 1: Query Decomposition', placeholders: ['question'] }, - { key: 'filter', label: 'Step 2: Relevance Filtering', placeholders: ['question', 'chunks'] }, - { key: 'generate', label: 'Step 3: Response Generation', placeholders: ['question', 'context'] }, + { key: 'filter_intro', label: 'Step 2.1: Filter Intro (Preamble)', placeholders: [] }, + { key: 'filter_section', label: 'Step 2.2: Filter Section (Per Sub-Q)', placeholders: ['subq_idx', 'subq_question', 'chunks'] }, + { key: 'filter_outro', label: 'Step 2.3: Filter Outro (Format)', placeholders: [] }, + { key: 'generate_per_subq', label: 'Step 3: Generate (Per-Sub-Question)', placeholders: ['context_sections'] }, ] as const const VALID_PLACEHOLDERS: Record = { decompose: ['{question}'], filter: ['{question}', '{chunks}'], generate: ['{question}', '{context}'], + generate_per_subq: ['{context_sections}'], + filter_intro: [], + filter_section: ['{subq_idx}', '{subq_question}', '{chunks}'], + filter_outro: [], } const findUnknownPlaceholders = (template: string, stepKey: string): string[] => { diff --git a/frontend/src/test/components/PlaceholderDocs.test.tsx b/frontend/src/test/components/PlaceholderDocs.test.tsx index 8b68ed5..c9967a5 100644 --- a/frontend/src/test/components/PlaceholderDocs.test.tsx +++ b/frontend/src/test/components/PlaceholderDocs.test.tsx @@ -8,16 +8,19 @@ describe('PlaceholderDocs', () => { expect(screen.getByText('Available Placeholders')).toBeInTheDocument() }) - it('renders all 3 placeholders: {question}, {chunks}, {context}', () => { + it('renders all 6 placeholders', () => { render() expect(screen.getByText('{question}')).toBeInTheDocument() expect(screen.getByText('{chunks}')).toBeInTheDocument() expect(screen.getByText('{context}')).toBeInTheDocument() + expect(screen.getByText('{context_sections}')).toBeInTheDocument() + expect(screen.getByText('{subq_idx}')).toBeInTheDocument() + expect(screen.getByText('{subq_question}')).toBeInTheDocument() }) it('renders each placeholder in a monospace element', () => { render() - const placeholders = ['{question}', '{chunks}', '{context}'] + const placeholders = ['{question}', '{chunks}', '{context}', '{context_sections}', '{subq_idx}', '{subq_question}'] placeholders.forEach((ph) => { const el = screen.getByText(ph) expect(el.tagName.toLowerCase()).toBe('code') @@ -29,6 +32,9 @@ describe('PlaceholderDocs', () => { expect(screen.getByText("The user's input question")).toBeInTheDocument() expect(screen.getByText('Retrieved document chunks (filter step only)')).toBeInTheDocument() expect(screen.getByText('Formatted chunks with citations (generate step only)')).toBeInTheDocument() + expect(screen.getByText('Per-sub-question context sections (generate per-subq only)')).toBeInTheDocument() + expect(screen.getByText('Sub-question index number (filter section only)')).toBeInTheDocument() + expect(screen.getByText('Sub-question text (filter section only)')).toBeInTheDocument() }) it('hides placeholder details when toggle button is clicked', () => { @@ -39,6 +45,7 @@ describe('PlaceholderDocs', () => { expect(screen.queryByText('{question}')).not.toBeInTheDocument() expect(screen.queryByText('{chunks}')).not.toBeInTheDocument() expect(screen.queryByText('{context}')).not.toBeInTheDocument() + expect(screen.queryByText('{context_sections}')).not.toBeInTheDocument() }) it('shows placeholder details again when toggle is clicked twice', () => { @@ -50,5 +57,6 @@ describe('PlaceholderDocs', () => { fireEvent.click(toggleBtn) expect(screen.getByText('{question}')).toBeInTheDocument() + expect(screen.getByText('{context_sections}')).toBeInTheDocument() }) }) diff --git a/frontend/src/test/components/PromptEditor.test.tsx b/frontend/src/test/components/PromptEditor.test.tsx index fda9bc6..aae2602 100644 --- a/frontend/src/test/components/PromptEditor.test.tsx +++ b/frontend/src/test/components/PromptEditor.test.tsx @@ -4,8 +4,10 @@ import { PromptEditor } from '../../components/PromptEditor' const mockPrompts: Record = { decompose: 'Decompose template with {question}', - filter: 'Filter template with {question} and {chunks}', - generate: 'Generate template with {question} and {context}', + filter_intro: 'Evaluate each chunk for relevance', + filter_section: 'Sub-question {subq_idx}: "{subq_question}"\n{chunks}', + filter_outro: 'Return JSON object mapping sub-question indices to scores.', + generate_per_subq: 'Generate per-subq with {context_sections}', } const defaultProps = { @@ -29,46 +31,54 @@ describe('PromptEditor', () => { defaultProps.onCancel.mockClear() }) - it('renders 3 textareas for decompose, filter, and generate steps', () => { + it('renders 5 textareas for the active steps', () => { render() const textareas = screen.getAllByRole('textbox') - expect(textareas).toHaveLength(3) + expect(textareas).toHaveLength(5) }) it('renders labels for each step', () => { render() expect(screen.getByText('Step 1: Query Decomposition')).toBeInTheDocument() - expect(screen.getByText('Step 2: Relevance Filtering')).toBeInTheDocument() - expect(screen.getByText('Step 3: Response Generation')).toBeInTheDocument() + expect(screen.getByText('Step 2.1: Filter Intro (Preamble)')).toBeInTheDocument() + expect(screen.getByText('Step 2.2: Filter Section (Per Sub-Q)')).toBeInTheDocument() + expect(screen.getByText('Step 2.3: Filter Outro (Format)')).toBeInTheDocument() + expect(screen.getByText('Step 3: Generate (Per-Sub-Question)')).toBeInTheDocument() }) it('shows valid placeholders in the label area for each step', () => { render() - // {question} appears in all 3 steps (decompose, filter, generate) - const questionPlaceholders = screen.getAllByText('{question}') - expect(questionPlaceholders).toHaveLength(3) - // {chunks} appears only in filter step + // {question} appears only in decompose step + expect(screen.getByText('{question}')).toBeInTheDocument() + // {chunks} appears only in filter_section expect(screen.getByText('{chunks}')).toBeInTheDocument() - // {context} appears only in generate step - expect(screen.getByText('{context}')).toBeInTheDocument() + // {context_sections} appears in generate_per_subq + expect(screen.getByText('{context_sections}')).toBeInTheDocument() + // {subq_idx}, {subq_question} appear in filter_section + expect(screen.getByText('{subq_idx}')).toBeInTheDocument() + expect(screen.getByText('{subq_question}')).toBeInTheDocument() + // {context} is no longer shown (flat generate removed) + expect(screen.queryByText('{context}')).not.toBeInTheDocument() }) it('renders per-step reset buttons that call onResetStep when clicked', () => { - // Mock window.confirm to return true const confirmSpy = vi.spyOn(window, 'confirm').mockReturnValue(true) render() const resetButtons = screen.getAllByTitle(/reset.*to default/i) - expect(resetButtons).toHaveLength(3) + expect(resetButtons).toHaveLength(5) fireEvent.click(resetButtons[0]) expect(defaultProps.onResetStep).toHaveBeenCalledWith('decompose') fireEvent.click(resetButtons[1]) - expect(defaultProps.onResetStep).toHaveBeenCalledWith('filter') + expect(defaultProps.onResetStep).toHaveBeenCalledWith('filter_intro') - fireEvent.click(resetButtons[2]) - expect(defaultProps.onResetStep).toHaveBeenCalledWith('generate') + fireEvent.click(resetButtons[3]) + expect(defaultProps.onResetStep).toHaveBeenCalledWith('filter_outro') + + fireEvent.click(resetButtons[4]) + expect(defaultProps.onResetStep).toHaveBeenCalledWith('generate_per_subq') confirmSpy.mockRestore() }) @@ -133,8 +143,10 @@ describe('PromptEditor', () => { const textareas = screen.getAllByRole('textbox') expect(textareas[0]).toHaveValue('Decompose template with {question}') - expect(textareas[1]).toHaveValue('Filter template with {question} and {chunks}') - expect(textareas[2]).toHaveValue('Generate template with {question} and {context}') + expect(textareas[1]).toHaveValue('Evaluate each chunk for relevance') + expect(textareas[2]).toHaveValue('Sub-question {subq_idx}: "{subq_question}"\n{chunks}') + expect(textareas[3]).toHaveValue('Return JSON object mapping sub-question indices to scores.') + expect(textareas[4]).toHaveValue('Generate per-subq with {context_sections}') }) it('calls onUpdate when typing in a textarea', () => { @@ -149,13 +161,11 @@ describe('PromptEditor', () => { it('displays character count for each textarea', () => { render() - const decomposeLength = mockPrompts.decompose.length - const filterLength = mockPrompts.filter.length - const generateLength = mockPrompts.generate.length - - expect(screen.getByText(`${decomposeLength} characters`)).toBeInTheDocument() - expect(screen.getByText(`${filterLength} characters`)).toBeInTheDocument() - expect(screen.getByText(`${generateLength} characters`)).toBeInTheDocument() + expect(screen.getByText(`${mockPrompts.decompose.length} characters`)).toBeInTheDocument() + expect(screen.getByText(`${mockPrompts.filter_intro.length} characters`)).toBeInTheDocument() + expect(screen.getByText(`${mockPrompts.filter_section.length} characters`)).toBeInTheDocument() + expect(screen.getByText(`${mockPrompts.filter_outro.length} characters`)).toBeInTheDocument() + expect(screen.getByText(`${mockPrompts.generate_per_subq.length} characters`)).toBeInTheDocument() }) it('shows the profile name being edited', () => { @@ -169,7 +179,6 @@ describe('PromptEditor', () => { const textareas = screen.getAllByRole('textbox') textareas.forEach((ta) => expect(ta).toBeDisabled()) - // "Reset All to Defaults" and "Cancel" should be disabled when saving expect(screen.getByRole('button', { name: /saving\.\.\./i })).toBeDisabled() expect(screen.getByRole('button', { name: /reset all to defaults/i })).toBeDisabled() expect(screen.getByRole('button', { name: /cancel/i })).toBeDisabled()