From 351950f5126ba02e0953021abecec22591993b63 Mon Sep 17 00:00:00 2001
From: Woody <woody.ck.tse@gmail.com>
Date: Thu, 23 Apr 2026 13:27:40 +0800
Subject: [PATCH] test(backend): update Phase 1 test suite

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 .../test_acceptance_phase1_rag_query.py       |  6 +-
 backend/app/test/conftest.py                  | 13 +++-
 backend/app/test/test_phase1_llm_client.py    | 59 +++++++++++++++----
 backend/app/test/test_phase1_query.py         | 18 +++---
 .../app/test/test_phase1_query_decomposer.py  | 27 ++++-----
 backend/app/test/test_phase1_rag_service.py   | 12 ++--
 .../app/test/test_phase1_relevance_filter.py  | 53 ++++++-----------
 7 files changed, 107 insertions(+), 81 deletions(-)

diff --git a/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py b/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py
index 196415a..953688d 100644
--- a/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py
+++ b/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py
@@ -94,4 +94,8 @@ def test_query_keywords_displayed(client, ingested_document):
     print(f"Extracted keywords: {keywords}")
     print(f"LLM Answer:\n{answer}")
 
-    assert any(kw.lower() in ["python", "programming", "paradigms"] for kw in keywords) or True
+    assert len(keywords) > 0
+    assert any(
+        kw.lower() in ("python", "programming", "paradigms", "support")
+        for kw in keywords
+    ), f"Expected relevant keywords but got: {keywords}"
diff --git a/backend/app/test/conftest.py b/backend/app/test/conftest.py
index cedf128..12b0798 100644
--- a/backend/app/test/conftest.py
+++ b/backend/app/test/conftest.py
@@ -3,18 +3,27 @@
 All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances.
 """
 import pytest
+from unittest.mock import AsyncMock, MagicMock
 
 
 @pytest.fixture
 def mock_llm_client(monkeypatch):
     """Mock LLM client to avoid hitting live APIs."""
-    pass  # TODO: implement mock
+    class _Mock:
+        async def complete(self, prompt: str, temperature: float = 0.7) -> str:  # type: ignore
+            return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}"
+
+    return _Mock()
 
 
 @pytest.fixture
 def mock_asr_client(monkeypatch):
     """Mock ASR client to avoid hitting live APIs."""
-    pass  # TODO: implement mock
+    class _Mock:
+        async def transcribe(self, audio_bytes):  # type: ignore
+            return ""
+
+    return _Mock()
 
 
 @pytest.fixture
diff --git a/backend/app/test/test_phase1_llm_client.py b/backend/app/test/test_phase1_llm_client.py
index 2ebff2a..de74e7f 100644
--- a/backend/app/test/test_phase1_llm_client.py
+++ b/backend/app/test/test_phase1_llm_client.py
@@ -1,25 +1,62 @@
 """Phase 1 tests: LLM client.
 
 Covers:
-- OpenAI-compatible API client for Qwen LLM
-- Provider switching via .env (OpenRouter, Alibaba Cloud, vLLM)
+- Async HTTP-based LLM client for Qwen LLM
+- Provider switching via Settings
 - Error handling for API failures
 - Mocked responses in test mode
 """
+import asyncio
 import pytest
+import httpx
+from unittest.mock import AsyncMock
+from app.services.llm_client import LLMClient, LLMClientError
+from app.core.config import get_settings
 
 
 class TestLLMClient:
-    """LLM client tests (all external calls mocked)."""
+    """LLM client tests (external calls mocked)."""
 
-    def test_llm_call_success(self, mock_llm_client):
-        """Should return structured response from mocked LLM."""
-        pass  # TODO: implement
+    @pytest.mark.asyncio
+    async def test_llm_call_success(self, monkeypatch):
+        """Should return content from mocked LLM API."""
+        settings = get_settings()
+        client = LLMClient(settings)
+
+        # Mock the underlying HTTP response
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return {
+                    "choices": [{"message": {"content": "mock response"}}]
+                }
+            def raise_for_status(self):
+                pass
+
+        async def _mock_post(*args, **kwargs):  # type: ignore
+            return _Resp()
+
+        # Patch AsyncClient.post
+        if hasattr(client, "_client") and client._client is not None:
+            client._client.post = _mock_post  # type: ignore
+        result = await client.complete(prompt="test prompt", temperature=0.7)
+        assert isinstance(result, str)
+        assert "mock" in result
 
     def test_llm_provider_switching(self):
-        """Should switch base URL based on .env config."""
-        pass  # TODO: implement
+        settings = get_settings()
+        # Ensure base URL comes from settings via client; the client stores base_url
+        client = LLMClient(settings)
+        assert settings.llm_base_url.rstrip("/") in client.base_url
 
-    def test_llm_api_error_handling(self):
-        """Should handle HTTP errors from LLM provider."""
-        pass  # TODO: implement
+    @pytest.mark.asyncio
+    async def test_llm_api_error_handling(self, monkeypatch):
+        settings = get_settings()
+        client = LLMClient(settings)
+
+        async def _mock_post(*args, **kwargs):  # type: ignore
+            raise httpx.HTTPStatusError("err", request=None, response=None)  # type: ignore
+
+        client._client.post = _mock_post  # type: ignore
+        with pytest.raises(LLMClientError):
+            await client.complete(prompt="test", temperature=0.7)
diff --git a/backend/app/test/test_phase1_query.py b/backend/app/test/test_phase1_query.py
index 833dfdb..d7d385b 100644
--- a/backend/app/test/test_phase1_query.py
+++ b/backend/app/test/test_phase1_query.py
@@ -8,15 +8,13 @@ Covers:
 """
 import pytest
 from fastapi.testclient import TestClient
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, AsyncMock, patch
 
 
 class TestQuery:
-    """RAG query endpoint tests."""
 
     @pytest.fixture
     def client(self):
-        """Create test client with mocked dependencies."""
         from app.main import app
         return TestClient(app)
 
@@ -24,7 +22,7 @@ class TestQuery:
         """Should return bullet-point answer with source metadata."""
         with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
             mock_decomposer = MagicMock()
-            mock_decomposer.decompose.return_value = ["test", "keywords"]
+            mock_decomposer.decompose = AsyncMock(return_value=["test", "keywords"])
             mock_decomposer_class.return_value = mock_decomposer
 
             with patch("app.routers.query.RAGService") as mock_rag_class:
@@ -33,15 +31,15 @@ class TestQuery:
                     ("chunk one", {"filename": "test.pdf"}, 0.1),
                     ("chunk two", {"filename": "test.pdf"}, 0.2),
                 ]
-                mock_rag.generate_response.return_value = "- Bullet point answer\n- Another point"
+                mock_rag.generate_response = AsyncMock(return_value="- Bullet point answer\n- Another point")
                 mock_rag_class.return_value = mock_rag
 
                 with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
                     mock_filter = MagicMock()
-                    mock_filter.filter.return_value = [
+                    mock_filter.filter = AsyncMock(return_value=[
                         ("chunk one", {"filename": "test.pdf"}),
                         ("chunk two", {"filename": "test.pdf"}),
-                    ]
+                    ])
                     mock_filter_class.return_value = mock_filter
 
                     response = client.post(
@@ -63,7 +61,7 @@ class TestQuery:
         """Should handle case when no relevant chunks found."""
         with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
             mock_decomposer = MagicMock()
-            mock_decomposer.decompose.return_value = ["test"]
+            mock_decomposer.decompose = AsyncMock(return_value=["test"])
             mock_decomposer_class.return_value = mock_decomposer
 
             with patch("app.routers.query.RAGService") as mock_rag_class:
@@ -71,12 +69,12 @@ class TestQuery:
                 mock_rag.retrieve.return_value = [
                     ("chunk one", {"filename": "test.pdf"}, 0.1),
                 ]
-                mock_rag.generate_response.return_value = "I could not find any relevant information."
+                mock_rag.generate_response = AsyncMock(return_value="I could not find any relevant information.")
                 mock_rag_class.return_value = mock_rag
 
                 with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
                     mock_filter = MagicMock()
-                    mock_filter.filter.return_value = []
+                    mock_filter.filter = AsyncMock(return_value=[])
                     mock_filter_class.return_value = mock_filter
 
                     response = client.post(
diff --git a/backend/app/test/test_phase1_query_decomposer.py b/backend/app/test/test_phase1_query_decomposer.py
index 944f8c3..82cd1fa 100644
--- a/backend/app/test/test_phase1_query_decomposer.py
+++ b/backend/app/test/test_phase1_query_decomposer.py
@@ -9,52 +9,47 @@ from app.services.query_decomposer import QueryDecomposer
 
 
 class MockLLMClient:
-    """Simple mock LLM client with a fixed response."""
-
     def __init__(self, response: str):
         self._response = response
         self.last_prompt = None
 
-    def complete(self, prompt: str, temperature: float = 0.7) -> str:
+    async def complete(self, prompt: str, temperature: float = 0.7) -> str:
         self.last_prompt = prompt
         return self._response
 
 
-def test_decompose_valid_json():
+async def test_decompose_valid_json():
     llm = MockLLMClient('["alpha", "beta", "gamma"]')
     decomposer = QueryDecomposer(llm)
-    result: List[str] = decomposer.decompose("What are keywords for X?")
+    result: List[str] = await decomposer.decompose("What are keywords for X?")
     assert result == ["alpha", "beta", "gamma"]
-    # Ensure the prompt was constructed with the given question
     assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array"
 
 
-def test_decompose_empty_question_returns_empty():
+async def test_decompose_empty_question_returns_empty():
     llm = MockLLMClient('["should_not_be_used"]')
     decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("")
+    result = await decomposer.decompose("")
     assert result == []
-    # LLM should not be called for empty input
     assert llm.last_prompt is None
 
 
-def test_decompose_invalid_json_returns_empty():
+async def test_decompose_invalid_json_returns_empty():
     llm = MockLLMClient("not-json")
     decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
+    result = await decomposer.decompose("Question?")
     assert result == []
 
 
-def test_decompose_non_list_json_returns_empty():
+async def test_decompose_non_list_json_returns_empty():
     llm = MockLLMClient("{\"a\": 1}")
     decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
+    result = await decomposer.decompose("Question?")
     assert result == []
 
 
-def test_decompose_mixed_types_coerced_to_strings():
+async def test_decompose_mixed_types_coerced_to_strings():
     llm = MockLLMClient('["a", 2, null]')
     decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
-    # Non-string items should be coerced to strings
+    result = await decomposer.decompose("Question?")
     assert result == ["a", "2", "None"]
diff --git a/backend/app/test/test_phase1_rag_service.py b/backend/app/test/test_phase1_rag_service.py
index 9827809..7d210ab 100644
--- a/backend/app/test/test_phase1_rag_service.py
+++ b/backend/app/test/test_phase1_rag_service.py
@@ -7,7 +7,7 @@ Covers:
 - Metadata handling per chunk
 """
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, AsyncMock
 
 
 class TestRAGService:
@@ -96,7 +96,7 @@ class TestRAGService:
 
         assert results == []
 
-    def test_generate_response_calls_llm(self):
+    async def test_generate_response_calls_llm(self):
         """Should call LLM with strict RAG prompt."""
         from app.services.rag import RAGService
 
@@ -105,14 +105,14 @@ class TestRAGService:
         mock_client.get_or_create_collection.return_value = mock_collection
 
         mock_llm = MagicMock()
-        mock_llm.complete.return_value = "- Bullet point answer"
+        mock_llm.complete = AsyncMock(return_value="- Bullet point answer")
 
         service = RAGService(chroma_client=mock_client, llm_client=mock_llm)
 
         chunks = ["relevant chunk"]
         metadata = [{"filename": "test.txt", "content_summary": "summary"}]
 
-        answer = service.generate_response("What is this?", chunks, metadata)
+        answer = await service.generate_response("What is this?", chunks, metadata)
 
         mock_llm.complete.assert_called_once()
         prompt = mock_llm.complete.call_args[1]["prompt"]
@@ -122,7 +122,7 @@ class TestRAGService:
         assert "only these document chunks" in prompt.lower()
         assert answer == "- Bullet point answer"
 
-    def test_generate_response_no_chunks(self):
+    async def test_generate_response_no_chunks(self):
         """Should return fallback message when no chunks provided."""
         from app.services.rag import RAGService
 
@@ -132,6 +132,6 @@ class TestRAGService:
 
         service = RAGService(chroma_client=mock_client, llm_client=MagicMock())
 
-        answer = service.generate_response("What is this?", [], [])
+        answer = await service.generate_response("What is this?", [], [])
 
         assert "no relevant" in answer.lower() or "could not find" in answer.lower()
diff --git a/backend/app/test/test_phase1_relevance_filter.py b/backend/app/test/test_phase1_relevance_filter.py
index 826e993..93e158b 100644
--- a/backend/app/test/test_phase1_relevance_filter.py
+++ b/backend/app/test/test_phase1_relevance_filter.py
@@ -1,23 +1,8 @@
 import json
 import pytest
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock
 
-# Import strategy: try standard import first, fallback to path hack if needed.
-try:
-    from app.services.relevance_filter import RelevanceFilter  # type: ignore
-except Exception:
-    # Fallback: attempt to load module directly by path to avoid import issues
-    import sys
-    from pathlib import Path
-    path_to_module = Path(__file__).resolve().parents[2] / 'app' / 'services' / 'relevance_filter.py'
-    if path_to_module.exists():
-        import importlib.util
-        spec = importlib.util.spec_from_file_location("relevance_filter", str(path_to_module))
-        module = importlib.util.module_from_spec(spec)  # type: ignore
-        spec.loader.exec_module(module)  # type: ignore
-        RelevanceFilter = module.RelevanceFilter  # type: ignore
-    else:
-        raise
+from app.services.relevance_filter import RelevanceFilter
 
 
 def _make_chunks():
@@ -28,58 +13,56 @@ def _make_chunks():
     ]
 
 
-def test_filter_basic_returns_only_above_threshold():
+async def test_filter_basic_returns_only_above_threshold():
     chunks = _make_chunks()
     llm = MagicMock()
-    llm.complete.return_value = "[8.5, 3.2, 9.0]"
+    llm.complete = AsyncMock(return_value="[8.5, 3.2, 9.0]")
 
     rf = RelevanceFilter(llm)
-    result = rf.filter("What is this about?", chunks, threshold=7.0)
+    result = await rf.filter("What is this about?", chunks, threshold=7.0)
 
     expected = [chunks[0], chunks[2]]
     assert result == expected
-    # Ensure a single batch call was made
     llm.complete.assert_called_once()
 
-    # Optional validation of prompt structure (contains the question and chunks)
     called_prompt = llm.complete.call_args[0][0]
     assert "What is this about?" in called_prompt
     for t in ["Chunk A text", "Chunk B text", "Chunk C text"]:
         assert t in called_prompt
 
 
-def test_filter_empty_chunks_returns_empty_and_no_llm_call():
+async def test_filter_empty_chunks_returns_empty_and_no_llm_call():
     llm = MagicMock()
+    llm.complete = AsyncMock()
     rf = RelevanceFilter(llm)
-    result = rf.filter("Question", [], threshold=7.0)
+    result = await rf.filter("Question", [], threshold=7.0)
     assert result == []
     llm.complete.assert_not_called()
 
 
-def test_filter_invalid_json_returns_empty():
+async def test_filter_invalid_json_returns_empty():
     chunks = _make_chunks()
     llm = MagicMock()
-    llm.complete.return_value = "not json"
+    llm.complete = AsyncMock(return_value="not json")
 
     rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=7.0)
+    result = await rf.filter("Question", chunks, threshold=7.0)
     assert result == []
 
 
-def test_filter_length_mismatch_returns_empty():
-    chunks = _make_chunks()[:2]  # 2 chunks
+async def test_filter_length_mismatch_returns_empty():
+    chunks = _make_chunks()[:2]
     llm = MagicMock()
-    llm.complete.return_value = "[5, 6]"  # 2 scores, ok length, but threshold will filter all
+    llm.complete = AsyncMock(return_value="[5, 6]")
     rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=7.0)
-    # Length matches, but both below threshold -> empty
+    result = await rf.filter("Question", chunks, threshold=7.0)
     assert result == []
 
 
-def test_filter_all_outside_threshold():
+async def test_filter_all_outside_threshold():
     chunks = _make_chunks()
     llm = MagicMock()
-    llm.complete.return_value = "[1.0, 2.0, 3.0]"
+    llm.complete = AsyncMock(return_value="[1.0, 2.0, 3.0]")
     rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=5.0)
+    result = await rf.filter("Question", chunks, threshold=5.0)
     assert result == []