feat(backend): rename keywords to extracted_questions in query pipeline (sub-phase 2.3)
Change QueryDecomposer prompt to generate 2-5 sub-questions instead of keywords. Rename API field from keywords to extracted_questions across models, service, and router. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
ecaa9ebb26
commit
f9dda7bd18
|
|
@ -10,6 +10,6 @@ class QueryRequest(BaseModel):
|
||||||
|
|
||||||
|
|
||||||
class QueryResponse(BaseModel):
|
class QueryResponse(BaseModel):
|
||||||
keywords: List[str]
|
extracted_questions: List[str]
|
||||||
answer: str
|
answer: str
|
||||||
sources: List[SourceMetadata]
|
sources: List[SourceMetadata]
|
||||||
|
|
|
||||||
|
|
@ -30,13 +30,13 @@ async def query(request: QueryRequest):
|
||||||
|
|
||||||
logger.info("Query: %s", request.question)
|
logger.info("Query: %s", request.question)
|
||||||
decomposer = QueryDecomposer(llm_client)
|
decomposer = QueryDecomposer(llm_client)
|
||||||
keywords = await decomposer.decompose(request.question)
|
extracted_questions = await decomposer.decompose(request.question)
|
||||||
logger.info("Keywords: %s", keywords)
|
logger.info("Extracted questions: %s", extracted_questions)
|
||||||
|
|
||||||
chunks = rag.retrieve(keywords, n_results=settings.retrieval_n_results)
|
chunks = rag.retrieve(extracted_questions, n_results=settings.retrieval_n_results)
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
return QueryResponse(extracted_questions=extracted_questions, answer=NO_RESULTS_ANSWER, sources=[])
|
||||||
|
|
||||||
chunks_for_filter = [(text, meta) for text, meta, _dist in chunks]
|
chunks_for_filter = [(text, meta) for text, meta, _dist in chunks]
|
||||||
relevance_filter = RelevanceFilter(llm_client)
|
relevance_filter = RelevanceFilter(llm_client)
|
||||||
|
|
@ -45,7 +45,7 @@ async def query(request: QueryRequest):
|
||||||
)
|
)
|
||||||
|
|
||||||
if not filtered:
|
if not filtered:
|
||||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
return QueryResponse(extracted_questions=extracted_questions, answer=NO_RESULTS_ANSWER, sources=[])
|
||||||
|
|
||||||
chunk_texts = [chunk for chunk, _meta in filtered]
|
chunk_texts = [chunk for chunk, _meta in filtered]
|
||||||
chunk_metadata = [meta for _chunk, meta in filtered]
|
chunk_metadata = [meta for _chunk, meta in filtered]
|
||||||
|
|
@ -65,7 +65,7 @@ async def query(request: QueryRequest):
|
||||||
for meta in chunk_metadata
|
for meta in chunk_metadata
|
||||||
]
|
]
|
||||||
|
|
||||||
return QueryResponse(keywords=keywords, answer=answer, sources=sources)
|
return QueryResponse(extracted_questions=extracted_questions, answer=answer, sources=sources)
|
||||||
|
|
||||||
except HTTPException:
|
except HTTPException:
|
||||||
raise
|
raise
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,8 @@
|
||||||
"""Query decomposer service.
|
"""Query decomposer service.
|
||||||
|
|
||||||
This module provides a lightweight QueryDecomposer that delegates the
|
This module provides a lightweight QueryDecomposer that delegates the
|
||||||
translation of a natural language question into a list of keyword search
|
decomposition of a natural language question into simplified sub-questions
|
||||||
terms to an LLM client.
|
to an LLM client.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
|
@ -26,11 +26,11 @@ def _extract_json_from_markdown(response: str) -> str:
|
||||||
|
|
||||||
|
|
||||||
class QueryDecomposer:
|
class QueryDecomposer:
|
||||||
"""Decompose a natural language question into a list of keywords.
|
"""Decompose a natural language question into simplified sub-questions.
|
||||||
|
|
||||||
The class expects an object that exposes an ``async complete(prompt: str) -> str``
|
The class expects an object that exposes an ``async complete(prompt: str) -> str``
|
||||||
method (an LLM client). The ``decompose`` method builds a prompt, asks the
|
method (an LLM client). The ``decompose`` method builds a prompt, asks the
|
||||||
LLM to return a JSON array of strings, and parses that JSON into a Python
|
LLM to return a JSON array of sub-question strings, and parses that JSON into a Python
|
||||||
list of strings. Edge cases are handled gracefully.
|
list of strings. Edge cases are handled gracefully.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
|
@ -38,20 +38,25 @@ class QueryDecomposer:
|
||||||
self.llm_client = llm_client
|
self.llm_client = llm_client
|
||||||
|
|
||||||
async def decompose(self, question: str) -> List[str]:
|
async def decompose(self, question: str) -> List[str]:
|
||||||
"""Return a list of keywords extracted for the given question.
|
"""Return a list of sub-questions extracted for the given question.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
question: The natural language question to decompose.
|
question: The natural language question to decompose.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
A list of keyword strings. If the LLM response is invalid or the
|
A list of sub-question strings. If the LLM response is invalid or the
|
||||||
input is empty, an empty list is returned.
|
input is empty, an empty list is returned.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if question is None or question.strip() == "":
|
if question is None or question.strip() == "":
|
||||||
return []
|
return []
|
||||||
|
|
||||||
prompt = f"Given question: '{question}', extract key search keywords as JSON array"
|
prompt = (
|
||||||
|
f"Given this question: '{question}'\n\n"
|
||||||
|
f"Break it down into 2-5 simplified sub-questions that would help "
|
||||||
|
f"search for relevant information. Each sub-question should be short "
|
||||||
|
f"and focused on one aspect. Return as a JSON array of strings."
|
||||||
|
)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
response = await self.llm_client.complete(prompt, step_name="QueryDecomposer")
|
response = await self.llm_client.complete(prompt, step_name="QueryDecomposer")
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue