diff --git a/backend/app/routers/ingest.py b/backend/app/routers/ingest.py index abf2d7b..63dee14 100644 --- a/backend/app/routers/ingest.py +++ b/backend/app/routers/ingest.py @@ -17,6 +17,7 @@ SUPPORTED_EXTENSIONS = {".pdf", ".docx", ".txt"} @router.post("/ingest", response_model=IngestResponse) async def ingest_document(file: UploadFile = File(...)): """Ingest a document into the RAG system.""" + from app.core.config import get_settings from app.services.rag import RAGService from app.utils.chunking import TokenChunkingStrategy from app.utils.metadata import extract_metadata @@ -30,6 +31,7 @@ async def ingest_document(file: UploadFile = File(...)): detail=f"Unsupported file format: {file_ext}. Supported: {', '.join(sorted(SUPPORTED_EXTENSIONS))}", ) + settings = get_settings() temp_path = None try: with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as tmp: @@ -51,7 +53,7 @@ async def ingest_document(file: UploadFile = File(...)): else: text = "" - chunker = TokenChunkingStrategy(chunk_size=1000, overlap=200) + chunker = TokenChunkingStrategy(chunk_size=settings.chunk_size, overlap=settings.chunk_overlap) chunks = chunker.chunk(text) if not chunks: @@ -59,7 +61,7 @@ async def ingest_document(file: UploadFile = File(...)): metadata = extract_metadata(temp_path, chunks) - rag = RAGService() + rag = RAGService(settings=settings) document_id = rag.ingest_document(temp_path, chunks, metadata) logger.info("Ingested %s: %d chunks, doc_id=%s", filename, len(chunks), document_id) diff --git a/backend/app/routers/query.py b/backend/app/routers/query.py index 6b114e1..ceecbe6 100644 --- a/backend/app/routers/query.py +++ b/backend/app/routers/query.py @@ -4,7 +4,8 @@ import logging from fastapi import APIRouter, HTTPException from app.core.config import get_settings -from app.models.ingest import QueryRequest, QueryResponse, SourceMetadata +from app.models.query import QueryRequest, QueryResponse +from app.models.common import SourceMetadata from app.services.llm_client import LLMClient from app.services.query_decomposer import QueryDecomposer from app.services.relevance_filter import RelevanceFilter @@ -18,14 +19,6 @@ NO_RESULTS_ANSWER = "I could not find any relevant information to answer your qu @router.post("/query", response_model=QueryResponse) async def query(request: QueryRequest): - """Execute the 3-step RAG query pipeline. - - Pipeline: - 1. QueryDecomposer: Extract keywords from question - 2. RAGService.retrieve: Get relevant chunks from ChromaDB - 3. RelevanceFilter: Score and filter chunks by relevance - 4. RAGService.generate_response: Generate bullet-point answer - """ if not request.question or not request.question.strip(): raise HTTPException(status_code=400, detail="Question is required") @@ -33,21 +26,23 @@ async def query(request: QueryRequest): try: llm_client = LLMClient(settings) + rag = RAGService(llm_client=llm_client, settings=settings) logger.info("Query: %s", request.question) decomposer = QueryDecomposer(llm_client) - keywords = decomposer.decompose(request.question) + keywords = await decomposer.decompose(request.question) logger.info("Keywords: %s", keywords) - rag = RAGService(llm_client=llm_client) - chunks = rag.retrieve(keywords, n_results=10) + chunks = rag.retrieve(keywords, n_results=settings.retrieval_n_results) if not chunks: return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[]) chunks_for_filter = [(text, meta) for text, meta, _dist in chunks] relevance_filter = RelevanceFilter(llm_client) - filtered = relevance_filter.filter(request.question, chunks_for_filter, threshold=7.0) + filtered = await relevance_filter.filter( + request.question, chunks_for_filter, threshold=settings.relevance_threshold + ) if not filtered: return QueryResponse(keywords=keywords, answer=NO_RESULTS_ANSWER, sources=[]) @@ -55,7 +50,7 @@ async def query(request: QueryRequest): chunk_texts = [chunk for chunk, _meta in filtered] chunk_metadata = [meta for _chunk, meta in filtered] - answer = rag.generate_response(request.question, chunk_texts, chunk_metadata) + answer = await rag.generate_response(request.question, chunk_texts, chunk_metadata) logger.info("Answer generated: %d chars, %d sources", len(answer), len(filtered)) sources = [