test(backend): update Phase 1 test suite

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-23 13:27:40 +08:00 · 2026-04-23 13:27:40 +08:00 · 351950f512
parent 4cf930dc59
commit 351950f512
7 changed files with 107 additions and 81 deletions
--- a/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py
+++ b/backend/app/test/acceptance/test_acceptance_phase1_rag_query.py
@ -94,4 +94,8 @@ def test_query_keywords_displayed(client, ingested_document):
    print(f"Extracted keywords: {keywords}")
    print(f"LLM Answer:\n{answer}")

-    assert any(kw.lower() in ["python", "programming", "paradigms"] for kw in keywords) or True
+    assert len(keywords) > 0
+    assert any(
+        kw.lower() in ("python", "programming", "paradigms", "support")
+        for kw in keywords
+    ), f"Expected relevant keywords but got: {keywords}"
--- a/backend/app/test/conftest.py
+++ b/backend/app/test/conftest.py
@ -3,18 +3,27 @@
 All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances.
 """
 import pytest
+from unittest.mock import AsyncMock, MagicMock


@pytest.fixture
 def mock_llm_client(monkeypatch):
    """Mock LLM client to avoid hitting live APIs."""
-    pass  # TODO: implement mock
+    class _Mock:
+        async def complete(self, prompt: str, temperature: float = 0.7) -> str:  # type: ignore
+            return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}"
+
+    return _Mock()


@pytest.fixture
 def mock_asr_client(monkeypatch):
    """Mock ASR client to avoid hitting live APIs."""
-    pass  # TODO: implement mock
+    class _Mock:
+        async def transcribe(self, audio_bytes):  # type: ignore
+            return ""
+
+    return _Mock()


@pytest.fixture
--- a/backend/app/test/test_phase1_llm_client.py
+++ b/backend/app/test/test_phase1_llm_client.py
@ -1,25 +1,62 @@
 """Phase 1 tests: LLM client.

 Covers:
- OpenAI-compatible API client for Qwen LLM
- Provider switching via .env (OpenRouter, Alibaba Cloud, vLLM)
+- Async HTTP-based LLM client for Qwen LLM
+- Provider switching via Settings
 - Error handling for API failures
 - Mocked responses in test mode
 """
+import asyncio
 import pytest
+import httpx
+from unittest.mock import AsyncMock
+from app.services.llm_client import LLMClient, LLMClientError
+from app.core.config import get_settings


 class TestLLMClient:
-    """LLM client tests (all external calls mocked)."""
+    """LLM client tests (external calls mocked)."""

-    def test_llm_call_success(self, mock_llm_client):
-        """Should return structured response from mocked LLM."""
-        pass  # TODO: implement
+    @pytest.mark.asyncio
+    async def test_llm_call_success(self, monkeypatch):
+        """Should return content from mocked LLM API."""
+        settings = get_settings()
+        client = LLMClient(settings)
+
+        # Mock the underlying HTTP response
+        class _Resp:
+            status_code = 200
+            def json(self):
+                return {
+                    "choices": [{"message": {"content": "mock response"}}]
+                }
+            def raise_for_status(self):
+                pass
+
+        async def _mock_post(*args, **kwargs):  # type: ignore
+            return _Resp()
+
+        # Patch AsyncClient.post
+        if hasattr(client, "_client") and client._client is not None:
+            client._client.post = _mock_post  # type: ignore
+        result = await client.complete(prompt="test prompt", temperature=0.7)
+        assert isinstance(result, str)
+        assert "mock" in result

    def test_llm_provider_switching(self):
-        """Should switch base URL based on .env config."""
-        pass  # TODO: implement
+        settings = get_settings()
+        # Ensure base URL comes from settings via client; the client stores base_url
+        client = LLMClient(settings)
+        assert settings.llm_base_url.rstrip("/") in client.base_url

-    def test_llm_api_error_handling(self):
-        """Should handle HTTP errors from LLM provider."""
-        pass  # TODO: implement
+    @pytest.mark.asyncio
+    async def test_llm_api_error_handling(self, monkeypatch):
+        settings = get_settings()
+        client = LLMClient(settings)
+
+        async def _mock_post(*args, **kwargs):  # type: ignore
+            raise httpx.HTTPStatusError("err", request=None, response=None)  # type: ignore
+
+        client._client.post = _mock_post  # type: ignore
+        with pytest.raises(LLMClientError):
+            await client.complete(prompt="test", temperature=0.7)
--- a/backend/app/test/test_phase1_query.py
+++ b/backend/app/test/test_phase1_query.py
@ -8,15 +8,13 @@ Covers:
 """
 import pytest
 from fastapi.testclient import TestClient
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, AsyncMock, patch


 class TestQuery:
-    """RAG query endpoint tests."""

    @pytest.fixture
    def client(self):
-        """Create test client with mocked dependencies."""
        from app.main import app
        return TestClient(app)

@ -24,7 +22,7 @@ class TestQuery:
        """Should return bullet-point answer with source metadata."""
        with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
            mock_decomposer = MagicMock()
-            mock_decomposer.decompose.return_value = ["test", "keywords"]
+            mock_decomposer.decompose = AsyncMock(return_value=["test", "keywords"])
            mock_decomposer_class.return_value = mock_decomposer

            with patch("app.routers.query.RAGService") as mock_rag_class:
@ -33,15 +31,15 @@ class TestQuery:
                    ("chunk one", {"filename": "test.pdf"}, 0.1),
                    ("chunk two", {"filename": "test.pdf"}, 0.2),
                ]
-                mock_rag.generate_response.return_value = "- Bullet point answer\n- Another point"
+                mock_rag.generate_response = AsyncMock(return_value="- Bullet point answer\n- Another point")
                mock_rag_class.return_value = mock_rag

                with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
                    mock_filter = MagicMock()
-                    mock_filter.filter.return_value = [
+                    mock_filter.filter = AsyncMock(return_value=[
                        ("chunk one", {"filename": "test.pdf"}),
                        ("chunk two", {"filename": "test.pdf"}),
-                    ]
+                    ])
                    mock_filter_class.return_value = mock_filter

                    response = client.post(
@ -63,7 +61,7 @@ class TestQuery:
        """Should handle case when no relevant chunks found."""
        with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
            mock_decomposer = MagicMock()
-            mock_decomposer.decompose.return_value = ["test"]
+            mock_decomposer.decompose = AsyncMock(return_value=["test"])
            mock_decomposer_class.return_value = mock_decomposer

            with patch("app.routers.query.RAGService") as mock_rag_class:
@ -71,12 +69,12 @@ class TestQuery:
                mock_rag.retrieve.return_value = [
                    ("chunk one", {"filename": "test.pdf"}, 0.1),
                ]
-                mock_rag.generate_response.return_value = "I could not find any relevant information."
+                mock_rag.generate_response = AsyncMock(return_value="I could not find any relevant information.")
                mock_rag_class.return_value = mock_rag

                with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
                    mock_filter = MagicMock()
-                    mock_filter.filter.return_value = []
+                    mock_filter.filter = AsyncMock(return_value=[])
                    mock_filter_class.return_value = mock_filter

                    response = client.post(
--- a/backend/app/test/test_phase1_query_decomposer.py
+++ b/backend/app/test/test_phase1_query_decomposer.py
@ -9,52 +9,47 @@ from app.services.query_decomposer import QueryDecomposer


 class MockLLMClient:
-    """Simple mock LLM client with a fixed response."""
-
    def __init__(self, response: str):
        self._response = response
        self.last_prompt = None

-    def complete(self, prompt: str, temperature: float = 0.7) -> str:
+    async def complete(self, prompt: str, temperature: float = 0.7) -> str:
        self.last_prompt = prompt
        return self._response


-def test_decompose_valid_json():
+async def test_decompose_valid_json():
    llm = MockLLMClient('["alpha", "beta", "gamma"]')
    decomposer = QueryDecomposer(llm)
-    result: List[str] = decomposer.decompose("What are keywords for X?")
+    result: List[str] = await decomposer.decompose("What are keywords for X?")
    assert result == ["alpha", "beta", "gamma"]
-    # Ensure the prompt was constructed with the given question
    assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array"


-def test_decompose_empty_question_returns_empty():
+async def test_decompose_empty_question_returns_empty():
    llm = MockLLMClient('["should_not_be_used"]')
    decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("")
+    result = await decomposer.decompose("")
    assert result == []
-    # LLM should not be called for empty input
    assert llm.last_prompt is None


-def test_decompose_invalid_json_returns_empty():
+async def test_decompose_invalid_json_returns_empty():
    llm = MockLLMClient("not-json")
    decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
+    result = await decomposer.decompose("Question?")
    assert result == []


-def test_decompose_non_list_json_returns_empty():
+async def test_decompose_non_list_json_returns_empty():
    llm = MockLLMClient("{\"a\": 1}")
    decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
+    result = await decomposer.decompose("Question?")
    assert result == []


-def test_decompose_mixed_types_coerced_to_strings():
+async def test_decompose_mixed_types_coerced_to_strings():
    llm = MockLLMClient('["a", 2, null]')
    decomposer = QueryDecomposer(llm)
-    result = decomposer.decompose("Question?")
-    # Non-string items should be coerced to strings
+    result = await decomposer.decompose("Question?")
    assert result == ["a", "2", "None"]
--- a/backend/app/test/test_phase1_rag_service.py
+++ b/backend/app/test/test_phase1_rag_service.py
@ -7,7 +7,7 @@ Covers:
 - Metadata handling per chunk
 """
 import pytest
-from unittest.mock import MagicMock, patch
+from unittest.mock import MagicMock, AsyncMock


 class TestRAGService:
@ -96,7 +96,7 @@ class TestRAGService:

        assert results == []

-    def test_generate_response_calls_llm(self):
+    async def test_generate_response_calls_llm(self):
        """Should call LLM with strict RAG prompt."""
        from app.services.rag import RAGService

@ -105,14 +105,14 @@ class TestRAGService:
        mock_client.get_or_create_collection.return_value = mock_collection

        mock_llm = MagicMock()
-        mock_llm.complete.return_value = "- Bullet point answer"
+        mock_llm.complete = AsyncMock(return_value="- Bullet point answer")

        service = RAGService(chroma_client=mock_client, llm_client=mock_llm)

        chunks = ["relevant chunk"]
        metadata = [{"filename": "test.txt", "content_summary": "summary"}]

-        answer = service.generate_response("What is this?", chunks, metadata)
+        answer = await service.generate_response("What is this?", chunks, metadata)

        mock_llm.complete.assert_called_once()
        prompt = mock_llm.complete.call_args[1]["prompt"]
@ -122,7 +122,7 @@ class TestRAGService:
        assert "only these document chunks" in prompt.lower()
        assert answer == "- Bullet point answer"

-    def test_generate_response_no_chunks(self):
+    async def test_generate_response_no_chunks(self):
        """Should return fallback message when no chunks provided."""
        from app.services.rag import RAGService

@ -132,6 +132,6 @@ class TestRAGService:

        service = RAGService(chroma_client=mock_client, llm_client=MagicMock())

-        answer = service.generate_response("What is this?", [], [])
+        answer = await service.generate_response("What is this?", [], [])

        assert "no relevant" in answer.lower() or "could not find" in answer.lower()
--- a/backend/app/test/test_phase1_relevance_filter.py
+++ b/backend/app/test/test_phase1_relevance_filter.py
@ -1,23 +1,8 @@
 import json
 import pytest
-from unittest.mock import MagicMock
+from unittest.mock import AsyncMock, MagicMock

-# Import strategy: try standard import first, fallback to path hack if needed.
-try:
-    from app.services.relevance_filter import RelevanceFilter  # type: ignore
-except Exception:
-    # Fallback: attempt to load module directly by path to avoid import issues
-    import sys
-    from pathlib import Path
-    path_to_module = Path(__file__).resolve().parents[2] / 'app' / 'services' / 'relevance_filter.py'
-    if path_to_module.exists():
-        import importlib.util
-        spec = importlib.util.spec_from_file_location("relevance_filter", str(path_to_module))
-        module = importlib.util.module_from_spec(spec)  # type: ignore
-        spec.loader.exec_module(module)  # type: ignore
-        RelevanceFilter = module.RelevanceFilter  # type: ignore
-    else:
-        raise
+from app.services.relevance_filter import RelevanceFilter


 def _make_chunks():
@ -28,58 +13,56 @@ def _make_chunks():
    ]


-def test_filter_basic_returns_only_above_threshold():
+async def test_filter_basic_returns_only_above_threshold():
    chunks = _make_chunks()
    llm = MagicMock()
-    llm.complete.return_value = "[8.5, 3.2, 9.0]"
+    llm.complete = AsyncMock(return_value="[8.5, 3.2, 9.0]")

    rf = RelevanceFilter(llm)
-    result = rf.filter("What is this about?", chunks, threshold=7.0)
+    result = await rf.filter("What is this about?", chunks, threshold=7.0)

    expected = [chunks[0], chunks[2]]
    assert result == expected
-    # Ensure a single batch call was made
    llm.complete.assert_called_once()

-    # Optional validation of prompt structure (contains the question and chunks)
    called_prompt = llm.complete.call_args[0][0]
    assert "What is this about?" in called_prompt
    for t in ["Chunk A text", "Chunk B text", "Chunk C text"]:
        assert t in called_prompt


-def test_filter_empty_chunks_returns_empty_and_no_llm_call():
+async def test_filter_empty_chunks_returns_empty_and_no_llm_call():
    llm = MagicMock()
+    llm.complete = AsyncMock()
    rf = RelevanceFilter(llm)
-    result = rf.filter("Question", [], threshold=7.0)
+    result = await rf.filter("Question", [], threshold=7.0)
    assert result == []
    llm.complete.assert_not_called()


-def test_filter_invalid_json_returns_empty():
+async def test_filter_invalid_json_returns_empty():
    chunks = _make_chunks()
    llm = MagicMock()
-    llm.complete.return_value = "not json"
+    llm.complete = AsyncMock(return_value="not json")

    rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=7.0)
+    result = await rf.filter("Question", chunks, threshold=7.0)
    assert result == []


-def test_filter_length_mismatch_returns_empty():
-    chunks = _make_chunks()[:2]  # 2 chunks
+async def test_filter_length_mismatch_returns_empty():
+    chunks = _make_chunks()[:2]
    llm = MagicMock()
-    llm.complete.return_value = "[5, 6]"  # 2 scores, ok length, but threshold will filter all
+    llm.complete = AsyncMock(return_value="[5, 6]")
    rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=7.0)
-    # Length matches, but both below threshold -> empty
+    result = await rf.filter("Question", chunks, threshold=7.0)
    assert result == []


-def test_filter_all_outside_threshold():
+async def test_filter_all_outside_threshold():
    chunks = _make_chunks()
    llm = MagicMock()
-    llm.complete.return_value = "[1.0, 2.0, 3.0]"
+    llm.complete = AsyncMock(return_value="[1.0, 2.0, 3.0]")
    rf = RelevanceFilter(llm)
-    result = rf.filter("Question", chunks, threshold=5.0)
+    result = await rf.filter("Question", chunks, threshold=5.0)
    assert result == []