"""Query decomposer service. This module provides a lightweight QueryDecomposer that delegates the decomposition of a natural language question into simplified sub-questions to an LLM client. Prompt templates are fetched from PromptService when available; otherwise, a built-in default is used. """ from __future__ import annotations import json import logging import re from typing import TYPE_CHECKING, List, Tuple if TYPE_CHECKING: from app.services.prompt_service import PromptService logger = logging.getLogger(__name__) # Fallback template used when prompt_service is not provided (tests, standalone). _BUILTIN_DECOMPOSE_TEMPLATE = ( "Given this question: '{question}'\n\n" "Break it down into 2-5 simplified sub-questions that would help " "search for relevant information. Each sub-question should be short " "and focused on one aspect. Return as a JSON array of strings." ) def _extract_json_from_markdown(response: str) -> str: if not isinstance(response, str): return str(response) pattern = r"```(?:json)?\s*\n?(.*?)\n?```" match = re.search(pattern, response, re.DOTALL) if match: return match.group(1).strip() return response.strip() class QueryDecomposer: """Decompose a natural language question into simplified sub-questions. The class expects an LLM client that exposes ``async complete(prompt: str) -> str`` and an optional ``PromptService`` for templated prompts. When ``prompt_service`` is ``None``, a built-in default template is used. """ def __init__(self, llm_client, prompt_service: "PromptService | None" = None) -> None: self.llm_client = llm_client self._prompt_service = prompt_service async def decompose(self, question: str) -> Tuple[List[str], str]: """Return a list of sub-questions and the prompt used for decomposition. Args: question: The natural language question to decompose. Returns: A tuple of (sub-questions, prompt). sub-questions is a list of strings; prompt is the rendered prompt string. If the LLM response is invalid or the input is empty, sub-questions will be an empty list and prompt will be ``""`` or the prompt that was attempted. """ if question is None or question.strip() == "": return [], "" if self._prompt_service is not None: template = self._prompt_service.get_prompt_template("decompose") else: template = _BUILTIN_DECOMPOSE_TEMPLATE prompt = template.replace("{question}", question) try: response = await self.llm_client.complete(prompt, step_name="QueryDecomposer") except Exception as exc: logger.warning("LLM decomposition failed: %s", exc) return [], prompt if not isinstance(response, str): response = str(response) response = _extract_json_from_markdown(response) try: data = json.loads(response) except json.JSONDecodeError: return [], prompt if not isinstance(data, list): return [], prompt if len(data) == 0: return [], prompt if all(isinstance(item, str) for item in data): return data, prompt return [str(item) for item in data], prompt