From 351950f5126ba02e0953021abecec22591993b63 Mon Sep 17 00:00:00 2001 From: Woody Date: Thu, 23 Apr 2026 13:27:40 +0800 Subject: [PATCH] test(backend): update Phase 1 test suite Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- .../test_acceptance_phase1_rag_query.py | 6 +- backend/app/test/conftest.py | 13 +++- backend/app/test/test_phase1_llm_client.py | 59 +++++++++++++++---- backend/app/test/test_phase1_query.py | 18 +++--- .../app/test/test_phase1_query_decomposer.py | 27 ++++----- backend/app/test/test_phase1_rag_service.py | 12 ++-- .../app/test/test_phase1_relevance_filter.py | 53 ++++++----------- 7 files changed, 107 insertions(+), 81 deletions(-) diff --git a/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py b/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py index 196415a..953688d 100644 --- a/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py +++ b/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py @@ -94,4 +94,8 @@ def test_query_keywords_displayed(client, ingested_document): print(f"Extracted keywords: {keywords}") print(f"LLM Answer:\n{answer}") - assert any(kw.lower() in ["python", "programming", "paradigms"] for kw in keywords) or True + assert len(keywords) > 0 + assert any( + kw.lower() in ("python", "programming", "paradigms", "support") + for kw in keywords + ), f"Expected relevant keywords but got: {keywords}" diff --git a/backend/app/test/conftest.py b/backend/app/test/conftest.py index cedf128..12b0798 100644 --- a/backend/app/test/conftest.py +++ b/backend/app/test/conftest.py @@ -3,18 +3,27 @@ All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances. """ import pytest +from unittest.mock import AsyncMock, MagicMock @pytest.fixture def mock_llm_client(monkeypatch): """Mock LLM client to avoid hitting live APIs.""" - pass # TODO: implement mock + class _Mock: + async def complete(self, prompt: str, temperature: float = 0.7) -> str: # type: ignore + return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}" + + return _Mock() @pytest.fixture def mock_asr_client(monkeypatch): """Mock ASR client to avoid hitting live APIs.""" - pass # TODO: implement mock + class _Mock: + async def transcribe(self, audio_bytes): # type: ignore + return "" + + return _Mock() @pytest.fixture diff --git a/backend/app/test/test_phase1_llm_client.py b/backend/app/test/test_phase1_llm_client.py index 2ebff2a..de74e7f 100644 --- a/backend/app/test/test_phase1_llm_client.py +++ b/backend/app/test/test_phase1_llm_client.py @@ -1,25 +1,62 @@ """Phase 1 tests: LLM client. Covers: -- OpenAI-compatible API client for Qwen LLM -- Provider switching via .env (OpenRouter, Alibaba Cloud, vLLM) +- Async HTTP-based LLM client for Qwen LLM +- Provider switching via Settings - Error handling for API failures - Mocked responses in test mode """ +import asyncio import pytest +import httpx +from unittest.mock import AsyncMock +from app.services.llm_client import LLMClient, LLMClientError +from app.core.config import get_settings class TestLLMClient: - """LLM client tests (all external calls mocked).""" + """LLM client tests (external calls mocked).""" - def test_llm_call_success(self, mock_llm_client): - """Should return structured response from mocked LLM.""" - pass # TODO: implement + @pytest.mark.asyncio + async def test_llm_call_success(self, monkeypatch): + """Should return content from mocked LLM API.""" + settings = get_settings() + client = LLMClient(settings) + + # Mock the underlying HTTP response + class _Resp: + status_code = 200 + def json(self): + return { + "choices": [{"message": {"content": "mock response"}}] + } + def raise_for_status(self): + pass + + async def _mock_post(*args, **kwargs): # type: ignore + return _Resp() + + # Patch AsyncClient.post + if hasattr(client, "_client") and client._client is not None: + client._client.post = _mock_post # type: ignore + result = await client.complete(prompt="test prompt", temperature=0.7) + assert isinstance(result, str) + assert "mock" in result def test_llm_provider_switching(self): - """Should switch base URL based on .env config.""" - pass # TODO: implement + settings = get_settings() + # Ensure base URL comes from settings via client; the client stores base_url + client = LLMClient(settings) + assert settings.llm_base_url.rstrip("/") in client.base_url - def test_llm_api_error_handling(self): - """Should handle HTTP errors from LLM provider.""" - pass # TODO: implement + @pytest.mark.asyncio + async def test_llm_api_error_handling(self, monkeypatch): + settings = get_settings() + client = LLMClient(settings) + + async def _mock_post(*args, **kwargs): # type: ignore + raise httpx.HTTPStatusError("err", request=None, response=None) # type: ignore + + client._client.post = _mock_post # type: ignore + with pytest.raises(LLMClientError): + await client.complete(prompt="test", temperature=0.7) diff --git a/backend/app/test/test_phase1_query.py b/backend/app/test/test_phase1_query.py index 833dfdb..d7d385b 100644 --- a/backend/app/test/test_phase1_query.py +++ b/backend/app/test/test_phase1_query.py @@ -8,15 +8,13 @@ Covers: """ import pytest from fastapi.testclient import TestClient -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, AsyncMock, patch class TestQuery: - """RAG query endpoint tests.""" @pytest.fixture def client(self): - """Create test client with mocked dependencies.""" from app.main import app return TestClient(app) @@ -24,7 +22,7 @@ class TestQuery: """Should return bullet-point answer with source metadata.""" with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class: mock_decomposer = MagicMock() - mock_decomposer.decompose.return_value = ["test", "keywords"] + mock_decomposer.decompose = AsyncMock(return_value=["test", "keywords"]) mock_decomposer_class.return_value = mock_decomposer with patch("app.routers.query.RAGService") as mock_rag_class: @@ -33,15 +31,15 @@ class TestQuery: ("chunk one", {"filename": "test.pdf"}, 0.1), ("chunk two", {"filename": "test.pdf"}, 0.2), ] - mock_rag.generate_response.return_value = "- Bullet point answer\n- Another point" + mock_rag.generate_response = AsyncMock(return_value="- Bullet point answer\n- Another point") mock_rag_class.return_value = mock_rag with patch("app.routers.query.RelevanceFilter") as mock_filter_class: mock_filter = MagicMock() - mock_filter.filter.return_value = [ + mock_filter.filter = AsyncMock(return_value=[ ("chunk one", {"filename": "test.pdf"}), ("chunk two", {"filename": "test.pdf"}), - ] + ]) mock_filter_class.return_value = mock_filter response = client.post( @@ -63,7 +61,7 @@ class TestQuery: """Should handle case when no relevant chunks found.""" with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class: mock_decomposer = MagicMock() - mock_decomposer.decompose.return_value = ["test"] + mock_decomposer.decompose = AsyncMock(return_value=["test"]) mock_decomposer_class.return_value = mock_decomposer with patch("app.routers.query.RAGService") as mock_rag_class: @@ -71,12 +69,12 @@ class TestQuery: mock_rag.retrieve.return_value = [ ("chunk one", {"filename": "test.pdf"}, 0.1), ] - mock_rag.generate_response.return_value = "I could not find any relevant information." + mock_rag.generate_response = AsyncMock(return_value="I could not find any relevant information.") mock_rag_class.return_value = mock_rag with patch("app.routers.query.RelevanceFilter") as mock_filter_class: mock_filter = MagicMock() - mock_filter.filter.return_value = [] + mock_filter.filter = AsyncMock(return_value=[]) mock_filter_class.return_value = mock_filter response = client.post( diff --git a/backend/app/test/test_phase1_query_decomposer.py b/backend/app/test/test_phase1_query_decomposer.py index 944f8c3..82cd1fa 100644 --- a/backend/app/test/test_phase1_query_decomposer.py +++ b/backend/app/test/test_phase1_query_decomposer.py @@ -9,52 +9,47 @@ from app.services.query_decomposer import QueryDecomposer class MockLLMClient: - """Simple mock LLM client with a fixed response.""" - def __init__(self, response: str): self._response = response self.last_prompt = None - def complete(self, prompt: str, temperature: float = 0.7) -> str: + async def complete(self, prompt: str, temperature: float = 0.7) -> str: self.last_prompt = prompt return self._response -def test_decompose_valid_json(): +async def test_decompose_valid_json(): llm = MockLLMClient('["alpha", "beta", "gamma"]') decomposer = QueryDecomposer(llm) - result: List[str] = decomposer.decompose("What are keywords for X?") + result: List[str] = await decomposer.decompose("What are keywords for X?") assert result == ["alpha", "beta", "gamma"] - # Ensure the prompt was constructed with the given question assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array" -def test_decompose_empty_question_returns_empty(): +async def test_decompose_empty_question_returns_empty(): llm = MockLLMClient('["should_not_be_used"]') decomposer = QueryDecomposer(llm) - result = decomposer.decompose("") + result = await decomposer.decompose("") assert result == [] - # LLM should not be called for empty input assert llm.last_prompt is None -def test_decompose_invalid_json_returns_empty(): +async def test_decompose_invalid_json_returns_empty(): llm = MockLLMClient("not-json") decomposer = QueryDecomposer(llm) - result = decomposer.decompose("Question?") + result = await decomposer.decompose("Question?") assert result == [] -def test_decompose_non_list_json_returns_empty(): +async def test_decompose_non_list_json_returns_empty(): llm = MockLLMClient("{\"a\": 1}") decomposer = QueryDecomposer(llm) - result = decomposer.decompose("Question?") + result = await decomposer.decompose("Question?") assert result == [] -def test_decompose_mixed_types_coerced_to_strings(): +async def test_decompose_mixed_types_coerced_to_strings(): llm = MockLLMClient('["a", 2, null]') decomposer = QueryDecomposer(llm) - result = decomposer.decompose("Question?") - # Non-string items should be coerced to strings + result = await decomposer.decompose("Question?") assert result == ["a", "2", "None"] diff --git a/backend/app/test/test_phase1_rag_service.py b/backend/app/test/test_phase1_rag_service.py index 9827809..7d210ab 100644 --- a/backend/app/test/test_phase1_rag_service.py +++ b/backend/app/test/test_phase1_rag_service.py @@ -7,7 +7,7 @@ Covers: - Metadata handling per chunk """ import pytest -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, AsyncMock class TestRAGService: @@ -96,7 +96,7 @@ class TestRAGService: assert results == [] - def test_generate_response_calls_llm(self): + async def test_generate_response_calls_llm(self): """Should call LLM with strict RAG prompt.""" from app.services.rag import RAGService @@ -105,14 +105,14 @@ class TestRAGService: mock_client.get_or_create_collection.return_value = mock_collection mock_llm = MagicMock() - mock_llm.complete.return_value = "- Bullet point answer" + mock_llm.complete = AsyncMock(return_value="- Bullet point answer") service = RAGService(chroma_client=mock_client, llm_client=mock_llm) chunks = ["relevant chunk"] metadata = [{"filename": "test.txt", "content_summary": "summary"}] - answer = service.generate_response("What is this?", chunks, metadata) + answer = await service.generate_response("What is this?", chunks, metadata) mock_llm.complete.assert_called_once() prompt = mock_llm.complete.call_args[1]["prompt"] @@ -122,7 +122,7 @@ class TestRAGService: assert "only these document chunks" in prompt.lower() assert answer == "- Bullet point answer" - def test_generate_response_no_chunks(self): + async def test_generate_response_no_chunks(self): """Should return fallback message when no chunks provided.""" from app.services.rag import RAGService @@ -132,6 +132,6 @@ class TestRAGService: service = RAGService(chroma_client=mock_client, llm_client=MagicMock()) - answer = service.generate_response("What is this?", [], []) + answer = await service.generate_response("What is this?", [], []) assert "no relevant" in answer.lower() or "could not find" in answer.lower() diff --git a/backend/app/test/test_phase1_relevance_filter.py b/backend/app/test/test_phase1_relevance_filter.py index 826e993..93e158b 100644 --- a/backend/app/test/test_phase1_relevance_filter.py +++ b/backend/app/test/test_phase1_relevance_filter.py @@ -1,23 +1,8 @@ import json import pytest -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock -# Import strategy: try standard import first, fallback to path hack if needed. -try: - from app.services.relevance_filter import RelevanceFilter # type: ignore -except Exception: - # Fallback: attempt to load module directly by path to avoid import issues - import sys - from pathlib import Path - path_to_module = Path(__file__).resolve().parents[2] / 'app' / 'services' / 'relevance_filter.py' - if path_to_module.exists(): - import importlib.util - spec = importlib.util.spec_from_file_location("relevance_filter", str(path_to_module)) - module = importlib.util.module_from_spec(spec) # type: ignore - spec.loader.exec_module(module) # type: ignore - RelevanceFilter = module.RelevanceFilter # type: ignore - else: - raise +from app.services.relevance_filter import RelevanceFilter def _make_chunks(): @@ -28,58 +13,56 @@ def _make_chunks(): ] -def test_filter_basic_returns_only_above_threshold(): +async def test_filter_basic_returns_only_above_threshold(): chunks = _make_chunks() llm = MagicMock() - llm.complete.return_value = "[8.5, 3.2, 9.0]" + llm.complete = AsyncMock(return_value="[8.5, 3.2, 9.0]") rf = RelevanceFilter(llm) - result = rf.filter("What is this about?", chunks, threshold=7.0) + result = await rf.filter("What is this about?", chunks, threshold=7.0) expected = [chunks[0], chunks[2]] assert result == expected - # Ensure a single batch call was made llm.complete.assert_called_once() - # Optional validation of prompt structure (contains the question and chunks) called_prompt = llm.complete.call_args[0][0] assert "What is this about?" in called_prompt for t in ["Chunk A text", "Chunk B text", "Chunk C text"]: assert t in called_prompt -def test_filter_empty_chunks_returns_empty_and_no_llm_call(): +async def test_filter_empty_chunks_returns_empty_and_no_llm_call(): llm = MagicMock() + llm.complete = AsyncMock() rf = RelevanceFilter(llm) - result = rf.filter("Question", [], threshold=7.0) + result = await rf.filter("Question", [], threshold=7.0) assert result == [] llm.complete.assert_not_called() -def test_filter_invalid_json_returns_empty(): +async def test_filter_invalid_json_returns_empty(): chunks = _make_chunks() llm = MagicMock() - llm.complete.return_value = "not json" + llm.complete = AsyncMock(return_value="not json") rf = RelevanceFilter(llm) - result = rf.filter("Question", chunks, threshold=7.0) + result = await rf.filter("Question", chunks, threshold=7.0) assert result == [] -def test_filter_length_mismatch_returns_empty(): - chunks = _make_chunks()[:2] # 2 chunks +async def test_filter_length_mismatch_returns_empty(): + chunks = _make_chunks()[:2] llm = MagicMock() - llm.complete.return_value = "[5, 6]" # 2 scores, ok length, but threshold will filter all + llm.complete = AsyncMock(return_value="[5, 6]") rf = RelevanceFilter(llm) - result = rf.filter("Question", chunks, threshold=7.0) - # Length matches, but both below threshold -> empty + result = await rf.filter("Question", chunks, threshold=7.0) assert result == [] -def test_filter_all_outside_threshold(): +async def test_filter_all_outside_threshold(): chunks = _make_chunks() llm = MagicMock() - llm.complete.return_value = "[1.0, 2.0, 3.0]" + llm.complete = AsyncMock(return_value="[1.0, 2.0, 3.0]") rf = RelevanceFilter(llm) - result = rf.filter("Question", chunks, threshold=5.0) + result = await rf.filter("Question", chunks, threshold=5.0) assert result == []