"""Query decomposer service. This module provides a lightweight QueryDecomposer that delegates the translation of a natural language question into a list of keyword search terms to an LLM client. """ from __future__ import annotations import json import logging import re from typing import List logger = logging.getLogger(__name__) def _extract_json_from_markdown(response: str) -> str: if not isinstance(response, str): return str(response) pattern = r"```(?:json)?\s*\n?(.*?)\n?```" match = re.search(pattern, response, re.DOTALL) if match: return match.group(1).strip() return response.strip() class QueryDecomposer: """Decompose a natural language question into a list of keywords. The class expects an object that exposes an ``async complete(prompt: str) -> str`` method (an LLM client). The ``decompose`` method builds a prompt, asks the LLM to return a JSON array of strings, and parses that JSON into a Python list of strings. Edge cases are handled gracefully. """ def __init__(self, llm_client) -> None: self.llm_client = llm_client async def decompose(self, question: str) -> List[str]: """Return a list of keywords extracted for the given question. Args: question: The natural language question to decompose. Returns: A list of keyword strings. If the LLM response is invalid or the input is empty, an empty list is returned. """ if question is None or question.strip() == "": return [] prompt = f"Given question: '{question}', extract key search keywords as JSON array" try: response = await self.llm_client.complete(prompt, step_name="QueryDecomposer") except Exception as exc: logger.warning("LLM decomposition failed: %s", exc) return [] if not isinstance(response, str): response = str(response) response = _extract_json_from_markdown(response) try: data = json.loads(response) except json.JSONDecodeError: return [] if not isinstance(data, list): return [] if len(data) == 0: return [] if all(isinstance(item, str) for item in data): return data return [str(item) for item in data]