legco_ai_assistant/backend/app/test/test_phase1_rag_service.py

138 lines
5.0 KiB
Python

"""Phase 1 tests: RAG service logic.
Covers:
- ChromaDB document ingestion with metadata
- Retrieval with query keywords
- Response generation with strict RAG prompt
- Metadata handling per chunk
"""
import pytest
from unittest.mock import MagicMock, patch
class TestRAGService:
"""RAG retrieval and prompt logic tests."""
def test_ingest_document_adds_chunks(self):
"""Should add chunks with metadata to ChromaDB collection."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
service = RAGService(chroma_client=mock_client)
chunks = ["chunk one", "chunk two"]
metadata = [
{"filename": "test.txt", "upload_date": "2024-01-01", "content_summary": "summary 1", "chunk_index": 0},
{"filename": "test.txt", "upload_date": "2024-01-01", "content_summary": "summary 2", "chunk_index": 1},
]
service.ingest_document("test.txt", chunks, metadata)
mock_client.get_or_create_collection.assert_called_once_with(name="documents")
mock_collection.add.assert_called_once()
call_args = mock_collection.add.call_args[1]
assert len(call_args["documents"]) == 2
assert call_args["documents"] == chunks
assert len(call_args["metadatas"]) == 2
assert call_args["metadatas"] == metadata
assert len(call_args["ids"]) == 2
def test_ingest_document_empty_chunks(self):
"""Should not call ChromaDB when chunks list is empty."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
service = RAGService(chroma_client=mock_client)
service.ingest_document("test.txt", [], [])
mock_collection.add.assert_not_called()
def test_retrieve_returns_chunks(self):
"""Should retrieve chunks and metadata from ChromaDB."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_collection.query.return_value = {
"documents": [["chunk one", "chunk two"]],
"metadatas": [[{"filename": "test.txt"}, {"filename": "test.txt"}]],
"distances": [[0.1, 0.2]],
}
service = RAGService(chroma_client=mock_client)
results = service.retrieve(["query", "keywords"], n_results=5)
mock_collection.query.assert_called_once()
call_args = mock_collection.query.call_args[1]
assert call_args["n_results"] == 5
assert len(results) == 2
assert results[0] == ("chunk one", {"filename": "test.txt"}, 0.1)
assert results[1] == ("chunk two", {"filename": "test.txt"}, 0.2)
def test_retrieve_no_results(self):
"""Should return empty list when no results found."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_collection.query.return_value = {
"documents": [[]],
"metadatas": [[]],
"distances": [[]],
}
service = RAGService(chroma_client=mock_client)
results = service.retrieve(["query"])
assert results == []
def test_generate_response_calls_llm(self):
"""Should call LLM with strict RAG prompt."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
mock_llm = MagicMock()
mock_llm.complete.return_value = "- Bullet point answer"
service = RAGService(chroma_client=mock_client, llm_client=mock_llm)
chunks = ["relevant chunk"]
metadata = [{"filename": "test.txt", "content_summary": "summary"}]
answer = service.generate_response("What is this?", chunks, metadata)
mock_llm.complete.assert_called_once()
prompt = mock_llm.complete.call_args[1]["prompt"]
assert "What is this?" in prompt
assert "relevant chunk" in prompt
assert "test.txt" in prompt
assert "only these document chunks" in prompt.lower()
assert answer == "- Bullet point answer"
def test_generate_response_no_chunks(self):
"""Should return fallback message when no chunks provided."""
from app.services.rag import RAGService
mock_collection = MagicMock()
mock_client = MagicMock()
mock_client.get_or_create_collection.return_value = mock_collection
service = RAGService(chroma_client=mock_client, llm_client=MagicMock())
answer = service.generate_response("What is this?", [], [])
assert "no relevant" in answer.lower() or "could not find" in answer.lower()