feat(backend): rename keywords to extracted_questions in query pipeline (sub-phase 2.3)
Change QueryDecomposer prompt to generate 2-5 sub-questions instead of keywords. Rename API field from keywords to extracted_questions across models, service, and router. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
ecaa9ebb26
commit
f9dda7bd18
|
|
@ -10,6 +10,6 @@ class QueryRequest(BaseModel):
|
|||
|
||||
|
||||
class QueryResponse(BaseModel):
|
||||
keywords: List[str]
|
||||
extracted_questions: List[str]
|
||||
answer: str
|
||||
sources: List[SourceMetadata]
|
||||
|
|
|
|||
|
|
@ -30,13 +30,13 @@ async def query(request: QueryRequest):
|
|||
|
||||
logger.info("Query: %s", request.question)
|
||||
decomposer = QueryDecomposer(llm_client)
|
||||
keywords = await decomposer.decompose(request.question)
|
||||
logger.info("Keywords: %s", keywords)
|
||||
extracted_questions = await decomposer.decompose(request.question)
|
||||
logger.info("Extracted questions: %s", extracted_questions)
|
||||
|
||||
chunks = rag.retrieve(keywords, n_results=settings.retrieval_n_results)
|
||||
chunks = rag.retrieve(extracted_questions, n_results=settings.retrieval_n_results)
|
||||
|
||||
if not chunks:
|
||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
||||
return QueryResponse(extracted_questions=extracted_questions, answer=NO_RESULTS_ANSWER, sources=[])
|
||||
|
||||
chunks_for_filter = [(text, meta) for text, meta, _dist in chunks]
|
||||
relevance_filter = RelevanceFilter(llm_client)
|
||||
|
|
@ -45,7 +45,7 @@ async def query(request: QueryRequest):
|
|||
)
|
||||
|
||||
if not filtered:
|
||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
||||
return QueryResponse(extracted_questions=extracted_questions, answer=NO_RESULTS_ANSWER, sources=[])
|
||||
|
||||
chunk_texts = [chunk for chunk, _meta in filtered]
|
||||
chunk_metadata = [meta for _chunk, meta in filtered]
|
||||
|
|
@ -65,7 +65,7 @@ async def query(request: QueryRequest):
|
|||
for meta in chunk_metadata
|
||||
]
|
||||
|
||||
return QueryResponse(keywords=keywords, answer=answer, sources=sources)
|
||||
return QueryResponse(extracted_questions=extracted_questions, answer=answer, sources=sources)
|
||||
|
||||
except HTTPException:
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
"""Query decomposer service.
|
||||
|
||||
This module provides a lightweight QueryDecomposer that delegates the
|
||||
translation of a natural language question into a list of keyword search
|
||||
terms to an LLM client.
|
||||
decomposition of a natural language question into simplified sub-questions
|
||||
to an LLM client.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
|
@ -26,11 +26,11 @@ def _extract_json_from_markdown(response: str) -> str:
|
|||
|
||||
|
||||
class QueryDecomposer:
|
||||
"""Decompose a natural language question into a list of keywords.
|
||||
"""Decompose a natural language question into simplified sub-questions.
|
||||
|
||||
The class expects an object that exposes an ``async complete(prompt: str) -> str``
|
||||
method (an LLM client). The ``decompose`` method builds a prompt, asks the
|
||||
LLM to return a JSON array of strings, and parses that JSON into a Python
|
||||
LLM to return a JSON array of sub-question strings, and parses that JSON into a Python
|
||||
list of strings. Edge cases are handled gracefully.
|
||||
"""
|
||||
|
||||
|
|
@ -38,20 +38,25 @@ class QueryDecomposer:
|
|||
self.llm_client = llm_client
|
||||
|
||||
async def decompose(self, question: str) -> List[str]:
|
||||
"""Return a list of keywords extracted for the given question.
|
||||
"""Return a list of sub-questions extracted for the given question.
|
||||
|
||||
Args:
|
||||
question: The natural language question to decompose.
|
||||
|
||||
Returns:
|
||||
A list of keyword strings. If the LLM response is invalid or the
|
||||
A list of sub-question strings. If the LLM response is invalid or the
|
||||
input is empty, an empty list is returned.
|
||||
"""
|
||||
|
||||
if question is None or question.strip() == "":
|
||||
return []
|
||||
|
||||
prompt = f"Given question: '{question}', extract key search keywords as JSON array"
|
||||
prompt = (
|
||||
f"Given this question: '{question}'\n\n"
|
||||
f"Break it down into 2-5 simplified sub-questions that would help "
|
||||
f"search for relevant information. Each sub-question should be short "
|
||||
f"and focused on one aspect. Return as a JSON array of strings."
|
||||
)
|
||||
|
||||
try:
|
||||
response = await self.llm_client.complete(prompt, step_name="QueryDecomposer")
|
||||
|
|
|
|||
Loading…
Reference in New Issue