diff --git a/backend/app/services/query_decomposer.py b/backend/app/services/query_decomposer.py index ff3b566..2895011 100644 --- a/backend/app/services/query_decomposer.py +++ b/backend/app/services/query_decomposer.py @@ -2,19 +2,22 @@ This module provides a lightweight QueryDecomposer that delegates the translation of a natural language question into a list of keyword search -terms to an LLM client. The interface is intentionally minimal to support -test-driven development for Phase 1.3. +terms to an LLM client. """ from __future__ import annotations import json +import logging from typing import List +logger = logging.getLogger(__name__) + + class QueryDecomposer: """Decompose a natural language question into a list of keywords. - The class expects an object that exposes a ``complete(prompt: str) -> str`` + The class expects an object that exposes an ``async complete(prompt: str) -> str`` method (an LLM client). The ``decompose`` method builds a prompt, asks the LLM to return a JSON array of strings, and parses that JSON into a Python list of strings. Edge cases are handled gracefully. @@ -23,7 +26,7 @@ class QueryDecomposer: def __init__(self, llm_client) -> None: self.llm_client = llm_client - def decompose(self, question: str) -> List[str]: + async def decompose(self, question: str) -> List[str]: """Return a list of keywords extracted for the given question. Args: @@ -40,9 +43,9 @@ class QueryDecomposer: prompt = f"Given question: '{question}', extract key search keywords as JSON array" try: - response = self.llm_client.complete(prompt) - except Exception: - # If the LLM call fails for any reason, defensively return no keywords + response = await self.llm_client.complete(prompt) + except Exception as exc: + logger.warning("LLM decomposition failed: %s", exc) return [] if not isinstance(response, str): @@ -51,15 +54,13 @@ class QueryDecomposer: try: data = json.loads(response) except json.JSONDecodeError: - # Invalid JSON – treat as no keywords return [] if not isinstance(data, list): return [] - # If all items are strings, return as-is. Otherwise, coerce to strings. if len(data) == 0: return [] if all(isinstance(item, str) for item in data): - return data # type: ignore[return-value] + return data return [str(item) for item in data] diff --git a/backend/app/services/rag.py b/backend/app/services/rag.py index f580510..f8ed3f7 100644 --- a/backend/app/services/rag.py +++ b/backend/app/services/rag.py @@ -1,13 +1,15 @@ """RAG service for embedding, retrieval, and response generation.""" import uuid from typing import List, Tuple, Dict, Any, Optional - -import httpx +import logging from app.core.config import Settings from app.core.database import get_chroma_client +logger = logging.getLogger(__name__) + + class RAGService: """Service for document ingestion, retrieval, and response generation.""" @@ -25,10 +27,14 @@ class RAGService: @property def collection(self): - """Lazy-load the ChromaDB collection.""" if self._collection is None: - from app.core.database import get_or_create_collection - self._collection = get_or_create_collection(self.chroma_client, "documents") + from app.core.database import get_or_create_collection, get_embedding_function_settings + embedding_fn = None + if self.settings is not None: + embedding_fn = get_embedding_function_settings(self.settings) + self._collection = get_or_create_collection( + self.chroma_client, "documents", embedding_function=embedding_fn + ) return self._collection def ingest_document( @@ -37,16 +43,6 @@ class RAGService: chunks: List[str], metadata_list: List[Dict[str, Any]], ) -> str: - """Ingest document chunks into ChromaDB. - - Args: - file_path: Path to the source file. - chunks: List of text chunks. - metadata_list: List of metadata dicts matching chunk count. - - Returns: - Document ID (UUID) for the ingestion batch. - """ if not chunks: return "" @@ -66,15 +62,6 @@ class RAGService: query_keywords: List[str], n_results: int = 10, ) -> List[Tuple[str, Dict[str, Any], float]]: - """Retrieve relevant chunks from ChromaDB. - - Args: - query_keywords: List of keywords from query decomposition. - n_results: Maximum number of results to retrieve. - - Returns: - List of (chunk_text, metadata, distance) tuples. - """ query_text = " ".join(query_keywords) results = self.collection.query( @@ -91,22 +78,12 @@ class RAGService: return chunks - def generate_response( + async def generate_response( self, question: str, chunks: List[str], metadata_list: List[Dict[str, Any]], ) -> str: - """Generate a bullet-point response using only provided chunks. - - Args: - question: The user's question. - chunks: List of relevant document chunks. - metadata_list: List of metadata for each chunk. - - Returns: - Bullet-point formatted answer string. - """ if not chunks: return "I could not find any relevant information to answer your question." @@ -135,4 +112,4 @@ class RAGService: f"Answer:" ) - return self.llm_client.complete(prompt=prompt, temperature=0.3) + return await self.llm_client.complete(prompt=prompt, temperature=0.3) diff --git a/backend/app/services/relevance_filter.py b/backend/app/services/relevance_filter.py index 7ec00d1..4a82f04 100644 --- a/backend/app/services/relevance_filter.py +++ b/backend/app/services/relevance_filter.py @@ -1,66 +1,58 @@ from __future__ import annotations import json +import logging from typing import List, Tuple, Dict +logger = logging.getLogger(__name__) + + class RelevanceFilter: """RelevanceFilter batches chunk texts to an LLM and selects those with relevance scores above a threshold. - - The constructor expects an llm_client-like object with a `complete(prompt: str, temperature: float = 0.7) -> str` method. """ def __init__(self, llm_client): self.llm_client = llm_client def _build_prompt(self, question: str, chunks: List[Tuple[str, Dict]]) -> str: - """Build the single prompt that asks the LLM to score all chunks. - - The prompt format is designed to be simple and deterministic for tests: - - Include the question - - List the chunk texts in order - - Ask for a JSON array of scores corresponding to each chunk - """ texts = [chunk_text for (chunk_text, _meta) in chunks] - # Keep the prompt readable and deterministic + lines = [] + for idx, t in enumerate(texts, start=1): + lines.append(f"Chunk {idx}: {t}") + chunks_formatted = "\n".join(lines) prompt = ( f"Given question '{question}' and these document chunks, rate each 0-10 for relevance. " - f"Return JSON array of scores. Chunks: {texts}" + f"Return JSON array of scores.\n{chunks_formatted}\n" ) return prompt - def filter( + async def filter( self, question: str, chunks: List[Tuple[str, Dict]], threshold: float = 7.0 ) -> List[Tuple[str, Dict]]: - """Return only chunks whose relevance score exceeds the threshold. - - - Chunks are sent to the LLM in a single batch call. - - Expects the LLM to respond with a JSON array of numbers, in the same - order as the provided chunks. - - If input is empty, returns an empty list. - - If the LLM response cannot be parsed or the length mismatches, returns an empty list. - """ - if not chunks: return [] prompt = self._build_prompt(question, chunks) - response = self.llm_client.complete(prompt, temperature=0.0) + try: + response = await self.llm_client.complete(prompt, temperature=0.0) + except Exception as exc: + logger.error("RelevanceFilter LLM call failed: %s", exc) + return [] scores: List[float] = [] try: parsed = json.loads(response) if not isinstance(parsed, list): return [] - # Ensure all values are numeric for v in parsed: if isinstance(v, (int, float)): scores.append(float(v)) else: return [] - except Exception: - # Gracefully handle invalid JSON or unexpected formats + except Exception as exc: + logger.error("RelevanceFilter JSON parse failed: %s", exc) return [] if len(scores) != len(chunks):