legco_ai_assistant/backend/app/test/test_phase3_prompt_injectio...

"""Tests for Phase 3.4: Prompt template injection into LLM services.

Verifies that QueryDecomposer, RelevanceFilter, and RAGService
correctly fetch templates from PromptService and substitute placeholders.

Uses real PromptService (SQLite via tmp_path), real ChromaDB (tmp_path),
and only mocks the external LLM API.
"""
import sqlite3

import chromadb
import pytest

from app.core.sqlite_db import init_prompts_db, seed_default_profiles
from app.services.prompt_service import PromptService


# ── helpers ──────────────────────────────────────────────────────────────


class _MockLLM:
    """Mock external LLM API — only external dependency we're allowed to mock."""

    def __init__(self, response: str = '["sub-q"]', side_effect: Exception | None = None):
        self._response = response
        self._side_effect = side_effect
        self.last_prompt: str | None = None
        self.calls: list[dict] = []
        self._call_count: int = 0

    async def complete(
        self, prompt: str, temperature: float = 0.7, step_name: str = "LLM"
    ) -> str:
        self.calls.append({"prompt": prompt, "step": step_name})
        self.last_prompt = prompt
        self._call_count += 1
        if self._side_effect:
            raise self._side_effect
        return self._response

    @property
    def call_count(self) -> int:
        return self._call_count

    def assert_called(self):
        assert self._call_count > 0, "LLM.complete was not called"

    def assert_not_called(self):
        assert self._call_count == 0, f"LLM.complete was called {self._call_count} time(s)"


def _create_prompt_service(
    tmp_path, custom_templates: dict[str, str] | None = None
) -> PromptService:
    """Create a real PromptService backed by real SQLite in tmp_path.

    Seeds default A/B/C profiles, then optionally updates the active profile
    (A) with *custom_templates* so the service returns controlled templates.
    """
    db_path = str(tmp_path / "prompts.db")
    conn = sqlite3.connect(db_path)
    conn.row_factory = sqlite3.Row
    conn.execute("PRAGMA foreign_keys=ON")
    init_prompts_db(conn)
    seed_default_profiles(conn)
    conn.close()

    svc = PromptService(db_path=db_path)
    if custom_templates:
        for step, template in custom_templates.items():
            svc.update_prompt("A", step, template)
    return svc


def _setup_chroma(tmp_path):
    """Create an isolated real ChromaDB PersistentClient for a test."""
    chroma_dir = tmp_path / "chroma"
    chroma_dir.mkdir(parents=True, exist_ok=True)
    return chromadb.PersistentClient(path=str(chroma_dir))


# ── QueryDecomposer tests ───────────────────────────────────────────────


async def test_decomposer_fetches_template_from_prompt_service(tmp_path):
    """QueryDecomposer should use the template returned by PromptService."""
    from app.services.query_decomposer import QueryDecomposer

    custom_template = "CUSTOM DECOMPOSE: {question} -> split"
    ps = _create_prompt_service(tmp_path, {"decompose": custom_template})
    llm = _MockLLM('["a"]')

    d = QueryDecomposer(llm, prompt_service=ps)
    questions, returned_prompt = await d.decompose("What is X?")

    sent_prompt = llm.last_prompt
    assert sent_prompt.startswith("CUSTOM DECOMPOSE:")
    assert "What is X?" in sent_prompt
    assert returned_prompt == sent_prompt


async def test_decomposer_uses_builtin_when_no_prompt_service():
    """Without prompt_service, the built-in seed template is used."""
    from app.services.query_decomposer import QueryDecomposer

    llm = _MockLLM('["a"]')
    d = QueryDecomposer(llm, prompt_service=None)
    questions, returned_prompt = await d.decompose("What is X?")

    sent_prompt = llm.last_prompt
    assert "Break it down into 2-5 simplified sub-questions" in sent_prompt
    assert "What is X?" in sent_prompt
    assert returned_prompt == sent_prompt


# ── RelevanceFilter tests ───────────────────────────────────────────────


async def test_filter_fetches_template_from_prompt_service(tmp_path):
    """RelevanceFilter should use the template from PromptService."""
    from app.services.relevance_filter import RelevanceFilter

    custom_template = "FILTER: q={question} chunks={chunks}"
    ps = _create_prompt_service(tmp_path, {"filter": custom_template})
    llm = _MockLLM("[5.0]")

    rf = RelevanceFilter(llm, prompt_service=ps)
    chunks = [("text A", {"filename": "a.pdf"})]
    filtered, returned_prompt = await rf.filter("My question", chunks, threshold=3.0)

    sent_prompt = llm.last_prompt
    assert sent_prompt.startswith("FILTER:")
    assert "My question" in sent_prompt
    assert "text A" in sent_prompt
    assert returned_prompt == sent_prompt


async def test_filter_uses_builtin_when_no_prompt_service():
    """Without prompt_service, the built-in filter template is used."""
    from app.services.relevance_filter import RelevanceFilter

    llm = _MockLLM("[5.0]")
    rf = RelevanceFilter(llm, prompt_service=None)
    chunks = [("text A", {"filename": "a.pdf"})]
    filtered, returned_prompt = await rf.filter("My question", chunks, threshold=3.0)

    sent_prompt = llm.last_prompt
    assert "rate each 0-10 for relevance" in sent_prompt
    assert "My question" in sent_prompt


# ── RAGService generate tests ───────────────────────────────────────────


async def test_generate_fetches_template_from_prompt_service(tmp_path):
    """RAGService.generate_response should use PromptService template."""
    from app.services.rag import RAGService

    custom_template = "GEN: {question} --- {context} END"
    ps = _create_prompt_service(tmp_path, {"generate": custom_template})
    llm = _MockLLM("answer")
    client = _setup_chroma(tmp_path)

    svc = RAGService(chroma_client=client, llm_client=llm, prompt_service=ps)
    answer, gen_prompt = await svc.generate_response(
        "What is X?",
        ["chunk data"],
        [{"filename": "f.txt", "content_summary": "sum"}],
    )

    sent_prompt = llm.last_prompt
    assert sent_prompt.startswith("GEN:")
    assert "What is X?" in sent_prompt
    assert "chunk data" in sent_prompt
    assert sent_prompt.endswith("END")
    assert gen_prompt == sent_prompt


async def test_generate_uses_builtin_when_no_prompt_service(tmp_path):
    """Without prompt_service, the built-in generate template is used."""
    from app.services.rag import RAGService

    llm = _MockLLM("answer")
    client = _setup_chroma(tmp_path)

    svc = RAGService(chroma_client=client, llm_client=llm, prompt_service=None)
    answer, gen_prompt = await svc.generate_response(
        "What is X?",
        ["chunk data"],
        [{"filename": "f.txt", "content_summary": "sum"}],
    )

    sent_prompt = llm.last_prompt
    assert "What is X?" in sent_prompt
    assert gen_prompt == sent_prompt


# ── Placeholder substitution safety tests ───────────────────────────────


async def test_placeholder_substitution_safe_with_curly_braces(tmp_path):
    """User text containing curly braces must not crash str.replace."""
    from app.services.query_decomposer import QueryDecomposer

    ps = _create_prompt_service(tmp_path)
    llm = _MockLLM('["a"]')

    d = QueryDecomposer(llm, prompt_service=ps)
    result, returned_prompt = await d.decompose("What about {key: value}?")
    assert isinstance(result, list)

    sent_prompt = llm.last_prompt
    assert "{key: value}" in sent_prompt
    assert returned_prompt == sent_prompt


async def test_unknown_placeholder_left_untouched(tmp_path):
    """Placeholders not matched by str.replace stay as-is in the prompt."""
    from app.services.query_decomposer import QueryDecomposer

    ps = _create_prompt_service(
        tmp_path, {"decompose": "HELLO {fake_var} and {question}"}
    )
    llm = _MockLLM('["a"]')

    d = QueryDecomposer(llm, prompt_service=ps)
    questions, returned_prompt = await d.decompose("Q?")

    sent_prompt = llm.last_prompt
    assert "{fake_var}" in sent_prompt
    assert "Q?" in sent_prompt


async def test_empty_template_produces_empty_prompt(tmp_path):
    """An empty template string results in an empty prompt."""
    from app.services.query_decomposer import QueryDecomposer

    ps = _create_prompt_service(tmp_path, {"decompose": ""})
    llm = _MockLLM('["a"]')

    d = QueryDecomposer(llm, prompt_service=ps)
    questions, returned_prompt = await d.decompose("Doesn't matter")

    sent_prompt = llm.last_prompt
    # Empty template with .replace("{question}", ...) still has no text
    assert sent_prompt == ""


# ── Edge case: no question / no chunks ──────────────────────────────────


async def test_decomposer_no_question_returns_empty(tmp_path):
    """Empty question returns [] without calling LLM."""
    from app.services.query_decomposer import QueryDecomposer

    ps = _create_prompt_service(tmp_path)
    llm = _MockLLM('["should_not_see"]')
    d = QueryDecomposer(llm, prompt_service=ps)
    result, returned_prompt = await d.decompose("")

    assert result == []
    assert returned_prompt == ""
    llm.assert_not_called()


async def test_filter_empty_chunks_no_template_fetch(tmp_path):
    """Empty chunks list returns [] without calling LLM."""
    from app.services.relevance_filter import RelevanceFilter

    ps = _create_prompt_service(tmp_path)
    llm = _MockLLM("[5]")
    rf = RelevanceFilter(llm, prompt_service=ps)
    result, returned_prompt = await rf.filter("Q?", [], threshold=5.0)

    assert result == []
    assert returned_prompt == ""
    llm.assert_not_called()


async def test_generate_no_chunks_returns_fallback(tmp_path):
    """No chunks returns fallback message without calling LLM."""
    from app.services.rag import RAGService

    ps = _create_prompt_service(tmp_path)
    llm = _MockLLM("answer")
    client = _setup_chroma(tmp_path)

    svc = RAGService(chroma_client=client, llm_client=llm, prompt_service=ps)
    answer, gen_prompt = await svc.generate_response("Q?", [], [])

    assert "could not find" in answer.lower()
    assert gen_prompt == ""
    llm.assert_not_called()


async def test_generate_per_subq_fetches_template_from_prompt_service(tmp_path):
    """RAGService.generate_response_per_subquestion should use PromptService template."""
    from app.services.rag import RAGService

    custom_template = "PER_SUBQ: {context_sections} DONE"
    ps = _create_prompt_service(tmp_path, {"generate_per_subq": custom_template})
    llm = _MockLLM("answer")
    client = _setup_chroma(tmp_path)

    svc = RAGService(chroma_client=client, llm_client=llm, prompt_service=ps)
    answer, gen_prompt, grouped_sources = await svc.generate_response_per_subquestion(
        ["What is X?"],
        [["chunk data"]],
        [[{"filename": "f.txt", "content_summary": "sum"}]],
    )

    sent_prompt = llm.last_prompt
    assert sent_prompt.startswith("PER_SUBQ:")
    assert "chunk data" in sent_prompt
    assert sent_prompt.endswith("DONE")
    assert gen_prompt == sent_prompt
    assert len(grouped_sources) == 1


async def test_generate_per_subq_uses_builtin_when_no_prompt_service(tmp_path):
    """Without prompt_service, the built-in per-subq template is used."""
    from app.services.rag import RAGService

    llm = _MockLLM("answer")
    client = _setup_chroma(tmp_path)

    svc = RAGService(chroma_client=client, llm_client=llm, prompt_service=None)
    answer, gen_prompt, grouped_sources = await svc.generate_response_per_subquestion(
        ["What is X?"],
        [["chunk data"]],
        [[{"filename": "f.txt", "content_summary": "sum"}]],
    )

    sent_prompt = llm.last_prompt
    assert "Sub-question" in sent_prompt
    assert "chunk data" in sent_prompt
    assert "{context_sections}" not in sent_prompt