"""RAG service for embedding, retrieval, and response generation.""" import uuid from typing import List, Tuple, Dict, Any, Optional import httpx from app.core.config import Settings from app.core.database import get_chroma_client class RAGService: """Service for document ingestion, retrieval, and response generation.""" def __init__( self, chroma_client=None, llm_client=None, settings: Optional[Settings] = None, ): self.chroma_client = chroma_client or get_chroma_client() self.llm_client = llm_client self.settings = settings self._collection = None @property def collection(self): """Lazy-load the ChromaDB collection.""" if self._collection is None: from app.core.database import get_or_create_collection self._collection = get_or_create_collection(self.chroma_client, "documents") return self._collection def ingest_document( self, file_path: str, chunks: List[str], metadata_list: List[Dict[str, Any]], ) -> str: """Ingest document chunks into ChromaDB. Args: file_path: Path to the source file. chunks: List of text chunks. metadata_list: List of metadata dicts matching chunk count. Returns: Document ID (UUID) for the ingestion batch. """ if not chunks: return "" document_id = str(uuid.uuid4()) ids = [f"{document_id}_{i}" for i in range(len(chunks))] self.collection.add( documents=chunks, metadatas=metadata_list, ids=ids, ) return document_id def retrieve( self, query_keywords: List[str], n_results: int = 10, ) -> List[Tuple[str, Dict[str, Any], float]]: """Retrieve relevant chunks from ChromaDB. Args: query_keywords: List of keywords from query decomposition. n_results: Maximum number of results to retrieve. Returns: List of (chunk_text, metadata, distance) tuples. """ query_text = " ".join(query_keywords) results = self.collection.query( query_texts=[query_text], n_results=n_results, ) chunks = [] if results["documents"] and results["documents"][0]: for i, doc in enumerate(results["documents"][0]): metadata = results["metadatas"][0][i] if results["metadatas"][0] else {} distance = results["distances"][0][i] if results["distances"][0] else 0.0 chunks.append((doc, metadata, distance)) return chunks def generate_response( self, question: str, chunks: List[str], metadata_list: List[Dict[str, Any]], ) -> str: """Generate a bullet-point response using only provided chunks. Args: question: The user's question. chunks: List of relevant document chunks. metadata_list: List of metadata for each chunk. Returns: Bullet-point formatted answer string. """ if not chunks: return "I could not find any relevant information to answer your question." if self.llm_client is None: return "LLM client not configured." context_parts = [] for i, (chunk, meta) in enumerate(zip(chunks, metadata_list)): source = meta.get("filename", "unknown") summary = meta.get("content_summary", "") context_parts.append( f"[{i + 1}] Source: {source}\n" f"Summary: {summary}\n" f"Content: {chunk}\n" ) context = "\n".join(context_parts) prompt = ( f"Question: {question}\n\n" f"Answer the question using ONLY these document chunks. " f"Do not use any external knowledge. " f"Format your answer as bullet points. " f"Cite the source number [N] for each point.\n\n" f"Document chunks:\n{context}\n\n" f"Answer:" ) return self.llm_client.complete(prompt=prompt, temperature=0.3)