legco_ai_assistant/backend/app/test/conftest.py

"""Shared pytest fixtures for backend tests.

All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances.
"""
import pytest
from unittest.mock import AsyncMock, MagicMock


@pytest.fixture
def mock_llm_client(monkeypatch):
    """Mock LLM client to avoid hitting live APIs."""
    class _Mock:
        async def complete(self, prompt: str, temperature: float = 0.7) -> str:  # type: ignore
            return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}"

    return _Mock()


@pytest.fixture
def mock_asr_client(monkeypatch):
    """Mock ASR client to avoid hitting live APIs."""
    class _Mock:
        async def transcribe(self, audio_bytes):  # type: ignore
            return ""

    return _Mock()


@pytest.fixture
def chroma_test_dir(tmp_path):
    """Provide a temporary directory for isolated ChromaDB instances."""
    return tmp_path / "chroma_test"


@pytest.fixture
def mock_prompt_service():
    """Mock PromptService for tests that don't need real DB.

    Returns seed templates matching the built-in defaults so tests
    that verify prompt content pass without a real prompts.db.
    """
    _SEEDS = {
        "decompose": (
            "Given this question: '{question}'\n\n"
            "Break it down into 2-5 simplified sub-questions that would help "
            "search for relevant information. Each sub-question should be short "
            "and focused on one aspect. Return as a JSON array of strings."
        ),
        "filter": (
            "Given question '{question}' and these document chunks, rate each 0-10 for relevance. "
            "Return JSON array of scores.\n{chunks}\n"
        ),
        "generate": (
            "Question: {question}\n\n"
            "Answer the question using ONLY these document chunks. "
            "Do not use any external knowledge. "
            "Format your answer as bullet points. "
            "Cite your sources inline using the exact bracket labels provided, "
            "e.g. [filename, page N]. Place the citation at the end of each relevant point.\n\n"
            "Document chunks:\n{context}\n\n"
            "Answer:"
        ),
        "generate_per_subq": (
            "Answer each sub-question using ONLY its document chunks.\n"
            "Format as markdown sections with ## Sub-question N: headers.\n"
            "{context_sections}\n\n"
            "Answer:"
        ),
        "filter_intro": "Evaluate each chunk for relevance to its associated sub-question only.",
        "filter_section": (
            '\nSub-question {subq_idx}: "{subq_question}"\n{chunks}'
        ),
        "filter_outro": (
            "\nFor each chunk, rate its relevance 0-10 considering ONLY its associated sub-question.\n"
            'Return a JSON object mapping sub-question indices to arrays of scores.\n'
            'Example: {"0": [8.5, 3.2, 9.0], "1": [7.0, 9.1]}'
        ),
    }

    class _MockPromptService:
        def get_active_profile_name(self) -> str:
            return "A"

        def get_prompt_template(self, step: str) -> str:
            return _SEEDS.get(step, "Template for {question}")

        def list_profiles(self) -> list[dict]:
            return [
                {"name": "A", "is_active": True},
                {"name": "B", "is_active": False},
                {"name": "C", "is_active": False},
            ]

        def activate_profile(self, name: str) -> None:
            pass

        def get_profile_prompts(self, name: str) -> dict:
            return {k: v for k, v in _SEEDS.items()}

        def update_prompt(self, name: str, step: str, template: str) -> None:
            pass

        def update_all_prompts(self, name: str, prompts: dict[str, str]) -> None:
            pass

        def reset_to_defaults(self, name: str, step: str | None = None) -> None:
            pass

    return _MockPromptService()