66 lines
2.1 KiB
Python
66 lines
2.1 KiB
Python
"""Query decomposer service.
|
||
|
||
This module provides a lightweight QueryDecomposer that delegates the
|
||
translation of a natural language question into a list of keyword search
|
||
terms to an LLM client. The interface is intentionally minimal to support
|
||
test-driven development for Phase 1.3.
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import json
|
||
from typing import List
|
||
|
||
|
||
class QueryDecomposer:
|
||
"""Decompose a natural language question into a list of keywords.
|
||
|
||
The class expects an object that exposes a ``complete(prompt: str) -> str``
|
||
method (an LLM client). The ``decompose`` method builds a prompt, asks the
|
||
LLM to return a JSON array of strings, and parses that JSON into a Python
|
||
list of strings. Edge cases are handled gracefully.
|
||
"""
|
||
|
||
def __init__(self, llm_client) -> None:
|
||
self.llm_client = llm_client
|
||
|
||
def decompose(self, question: str) -> List[str]:
|
||
"""Return a list of keywords extracted for the given question.
|
||
|
||
Args:
|
||
question: The natural language question to decompose.
|
||
|
||
Returns:
|
||
A list of keyword strings. If the LLM response is invalid or the
|
||
input is empty, an empty list is returned.
|
||
"""
|
||
|
||
if question is None or question.strip() == "":
|
||
return []
|
||
|
||
prompt = f"Given question: '{question}', extract key search keywords as JSON array"
|
||
|
||
try:
|
||
response = self.llm_client.complete(prompt)
|
||
except Exception:
|
||
# If the LLM call fails for any reason, defensively return no keywords
|
||
return []
|
||
|
||
if not isinstance(response, str):
|
||
response = str(response)
|
||
|
||
try:
|
||
data = json.loads(response)
|
||
except json.JSONDecodeError:
|
||
# Invalid JSON – treat as no keywords
|
||
return []
|
||
|
||
if not isinstance(data, list):
|
||
return []
|
||
|
||
# If all items are strings, return as-is. Otherwise, coerce to strings.
|
||
if len(data) == 0:
|
||
return []
|
||
if all(isinstance(item, str) for item in data):
|
||
return data # type: ignore[return-value]
|
||
return [str(item) for item in data]
|