legco_ai_assistant/backend/app/services/query_decomposer.py

66 lines
2.1 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""Query decomposer service.
This module provides a lightweight QueryDecomposer that delegates the
translation of a natural language question into a list of keyword search
terms to an LLM client. The interface is intentionally minimal to support
test-driven development for Phase 1.3.
"""
from __future__ import annotations
import json
from typing import List
class QueryDecomposer:
"""Decompose a natural language question into a list of keywords.
The class expects an object that exposes a ``complete(prompt: str) -> str``
method (an LLM client). The ``decompose`` method builds a prompt, asks the
LLM to return a JSON array of strings, and parses that JSON into a Python
list of strings. Edge cases are handled gracefully.
"""
def __init__(self, llm_client) -> None:
self.llm_client = llm_client
def decompose(self, question: str) -> List[str]:
"""Return a list of keywords extracted for the given question.
Args:
question: The natural language question to decompose.
Returns:
A list of keyword strings. If the LLM response is invalid or the
input is empty, an empty list is returned.
"""
if question is None or question.strip() == "":
return []
prompt = f"Given question: '{question}', extract key search keywords as JSON array"
try:
response = self.llm_client.complete(prompt)
except Exception:
# If the LLM call fails for any reason, defensively return no keywords
return []
if not isinstance(response, str):
response = str(response)
try:
data = json.loads(response)
except json.JSONDecodeError:
# Invalid JSON treat as no keywords
return []
if not isinstance(data, list):
return []
# If all items are strings, return as-is. Otherwise, coerce to strings.
if len(data) == 0:
return []
if all(isinstance(item, str) for item in data):
return data # type: ignore[return-value]
return [str(item) for item in data]