85 lines
2.6 KiB
Python
85 lines
2.6 KiB
Python
"""Query decomposer service.
|
|
|
|
This module provides a lightweight QueryDecomposer that delegates the
|
|
decomposition of a natural language question into simplified sub-questions
|
|
to an LLM client.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import logging
|
|
import re
|
|
from typing import List
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
def _extract_json_from_markdown(response: str) -> str:
|
|
if not isinstance(response, str):
|
|
return str(response)
|
|
pattern = r"```(?:json)?\s*\n?(.*?)\n?```"
|
|
match = re.search(pattern, response, re.DOTALL)
|
|
if match:
|
|
return match.group(1).strip()
|
|
return response.strip()
|
|
|
|
|
|
class QueryDecomposer:
|
|
"""Decompose a natural language question into simplified sub-questions.
|
|
|
|
The class expects an object that exposes an ``async complete(prompt: str) -> str``
|
|
method (an LLM client). The ``decompose`` method builds a prompt, asks the
|
|
LLM to return a JSON array of sub-question strings, and parses that JSON into a Python
|
|
list of strings. Edge cases are handled gracefully.
|
|
"""
|
|
|
|
def __init__(self, llm_client) -> None:
|
|
self.llm_client = llm_client
|
|
|
|
async def decompose(self, question: str) -> List[str]:
|
|
"""Return a list of sub-questions extracted for the given question.
|
|
|
|
Args:
|
|
question: The natural language question to decompose.
|
|
|
|
Returns:
|
|
A list of sub-question strings. If the LLM response is invalid or the
|
|
input is empty, an empty list is returned.
|
|
"""
|
|
|
|
if question is None or question.strip() == "":
|
|
return []
|
|
|
|
prompt = (
|
|
f"Given this question: '{question}'\n\n"
|
|
f"Break it down into 2-5 simplified sub-questions that would help "
|
|
f"search for relevant information. Each sub-question should be short "
|
|
f"and focused on one aspect. Return as a JSON array of strings."
|
|
)
|
|
|
|
try:
|
|
response = await self.llm_client.complete(prompt, step_name="QueryDecomposer")
|
|
except Exception as exc:
|
|
logger.warning("LLM decomposition failed: %s", exc)
|
|
return []
|
|
|
|
if not isinstance(response, str):
|
|
response = str(response)
|
|
|
|
response = _extract_json_from_markdown(response)
|
|
|
|
try:
|
|
data = json.loads(response)
|
|
except json.JSONDecodeError:
|
|
return []
|
|
|
|
if not isinstance(data, list):
|
|
return []
|
|
|
|
if len(data) == 0:
|
|
return []
|
|
if all(isinstance(item, str) for item in data):
|
|
return data
|
|
return [str(item) for item in data]
|