refactor(backend): update ingest and query routers
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
09f8cb7e6d
commit
4a22b906e4
|
|
@ -17,6 +17,7 @@ SUPPORTED_EXTENSIONS = {".pdf", ".docx", ".txt"}
|
||||||
@router.post("/ingest", response_model=IngestResponse)
|
@router.post("/ingest", response_model=IngestResponse)
|
||||||
async def ingest_document(file: UploadFile = File(...)):
|
async def ingest_document(file: UploadFile = File(...)):
|
||||||
"""Ingest a document into the RAG system."""
|
"""Ingest a document into the RAG system."""
|
||||||
|
from app.core.config import get_settings
|
||||||
from app.services.rag import RAGService
|
from app.services.rag import RAGService
|
||||||
from app.utils.chunking import TokenChunkingStrategy
|
from app.utils.chunking import TokenChunkingStrategy
|
||||||
from app.utils.metadata import extract_metadata
|
from app.utils.metadata import extract_metadata
|
||||||
|
|
@ -30,6 +31,7 @@ async def ingest_document(file: UploadFile = File(...)):
|
||||||
detail=f"Unsupported file format: {file_ext}. Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}",
|
detail=f"Unsupported file format: {file_ext}. Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
temp_path = None
|
temp_path = None
|
||||||
try:
|
try:
|
||||||
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
|
with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp:
|
||||||
|
|
@ -51,7 +53,7 @@ async def ingest_document(file: UploadFile = File(...)):
|
||||||
else:
|
else:
|
||||||
text = ""
|
text = ""
|
||||||
|
|
||||||
chunker = TokenChunkingStrategy(chunk_size=1000, overlap=200)
|
chunker = TokenChunkingStrategy(chunk_size=settings.chunk_size, overlap=settings.chunk_overlap)
|
||||||
chunks = chunker.chunk(text)
|
chunks = chunker.chunk(text)
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
|
|
@ -59,7 +61,7 @@ async def ingest_document(file: UploadFile = File(...)):
|
||||||
|
|
||||||
metadata = extract_metadata(temp_path, chunks)
|
metadata = extract_metadata(temp_path, chunks)
|
||||||
|
|
||||||
rag = RAGService()
|
rag = RAGService(settings=settings)
|
||||||
document_id = rag.ingest_document(temp_path, chunks, metadata)
|
document_id = rag.ingest_document(temp_path, chunks, metadata)
|
||||||
|
|
||||||
logger.info("Ingested %s: %d chunks, doc_id=%s", filename, len(chunks), document_id)
|
logger.info("Ingested %s: %d chunks, doc_id=%s", filename, len(chunks), document_id)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,8 @@ import logging
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
|
|
||||||
from app.core.config import get_settings
|
from app.core.config import get_settings
|
||||||
from app.models.ingest import QueryRequest, QueryResponse, SourceMetadata
|
from app.models.query import QueryRequest, QueryResponse
|
||||||
|
from app.models.common import SourceMetadata
|
||||||
from app.services.llm_client import LLMClient
|
from app.services.llm_client import LLMClient
|
||||||
from app.services.query_decomposer import QueryDecomposer
|
from app.services.query_decomposer import QueryDecomposer
|
||||||
from app.services.relevance_filter import RelevanceFilter
|
from app.services.relevance_filter import RelevanceFilter
|
||||||
|
|
@ -18,14 +19,6 @@ NO_RESULTS_ANSWER = "I could not find any relevant information to answer your qu
|
||||||
|
|
||||||
@router.post("/query", response_model=QueryResponse)
|
@router.post("/query", response_model=QueryResponse)
|
||||||
async def query(request: QueryRequest):
|
async def query(request: QueryRequest):
|
||||||
"""Execute the 3-step RAG query pipeline.
|
|
||||||
|
|
||||||
Pipeline:
|
|
||||||
1. QueryDecomposer: Extract keywords from question
|
|
||||||
2. RAGService.retrieve: Get relevant chunks from ChromaDB
|
|
||||||
3. RelevanceFilter: Score and filter chunks by relevance
|
|
||||||
4. RAGService.generate_response: Generate bullet-point answer
|
|
||||||
"""
|
|
||||||
if not request.question or not request.question.strip():
|
if not request.question or not request.question.strip():
|
||||||
raise HTTPException(status_code=400, detail="Question is required")
|
raise HTTPException(status_code=400, detail="Question is required")
|
||||||
|
|
||||||
|
|
@ -33,21 +26,23 @@ async def query(request: QueryRequest):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
llm_client = LLMClient(settings)
|
llm_client = LLMClient(settings)
|
||||||
|
rag = RAGService(llm_client=llm_client, settings=settings)
|
||||||
|
|
||||||
logger.info("Query: %s", request.question)
|
logger.info("Query: %s", request.question)
|
||||||
decomposer = QueryDecomposer(llm_client)
|
decomposer = QueryDecomposer(llm_client)
|
||||||
keywords = decomposer.decompose(request.question)
|
keywords = await decomposer.decompose(request.question)
|
||||||
logger.info("Keywords: %s", keywords)
|
logger.info("Keywords: %s", keywords)
|
||||||
|
|
||||||
rag = RAGService(llm_client=llm_client)
|
chunks = rag.retrieve(keywords, n_results=settings.retrieval_n_results)
|
||||||
chunks = rag.retrieve(keywords, n_results=10)
|
|
||||||
|
|
||||||
if not chunks:
|
if not chunks:
|
||||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
||||||
|
|
||||||
chunks_for_filter = [(text, meta) for text, meta, _dist in chunks]
|
chunks_for_filter = [(text, meta) for text, meta, _dist in chunks]
|
||||||
relevance_filter = RelevanceFilter(llm_client)
|
relevance_filter = RelevanceFilter(llm_client)
|
||||||
filtered = relevance_filter.filter(request.question, chunks_for_filter, threshold=7.0)
|
filtered = await relevance_filter.filter(
|
||||||
|
request.question, chunks_for_filter, threshold=settings.relevance_threshold
|
||||||
|
)
|
||||||
|
|
||||||
if not filtered:
|
if not filtered:
|
||||||
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[])
|
||||||
|
|
@ -55,7 +50,7 @@ async def query(request: QueryRequest):
|
||||||
chunk_texts = [chunk for chunk, _meta in filtered]
|
chunk_texts = [chunk for chunk, _meta in filtered]
|
||||||
chunk_metadata = [meta for _chunk, meta in filtered]
|
chunk_metadata = [meta for _chunk, meta in filtered]
|
||||||
|
|
||||||
answer = rag.generate_response(request.question, chunk_texts, chunk_metadata)
|
answer = await rag.generate_response(request.question, chunk_texts, chunk_metadata)
|
||||||
logger.info("Answer generated: %d chars, %d sources", len(answer), len(filtered))
|
logger.info("Answer generated: %d chars, %d sources", len(answer), len(filtered))
|
||||||
|
|
||||||
sources = [
|
sources = [
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue