test(backend): update Phase 1 test suite

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-04-23 13:27:40 +08:00
parent 4cf930dc59
commit 351950f512
7 changed files with 107 additions and 81 deletions

View File

@ -94,4 +94,8 @@ def test_query_keywords_displayed(client, ingested_document):
print(f"Extracted keywords: {keywords}") print(f"Extracted keywords: {keywords}")
print(f"LLM Answer:\n{answer}") print(f"LLM Answer:\n{answer}")
assert any(kw.lower() in ["python", "programming", "paradigms"] for kw in keywords) or True assert len(keywords) > 0
assert any(
kw.lower() in ("python", "programming", "paradigms", "support")
for kw in keywords
), f"Expected relevant keywords but got: {keywords}"

View File

@ -3,18 +3,27 @@
All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances. All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances.
""" """
import pytest import pytest
from unittest.mock import AsyncMock, MagicMock
@pytest.fixture @pytest.fixture
def mock_llm_client(monkeypatch): def mock_llm_client(monkeypatch):
"""Mock LLM client to avoid hitting live APIs.""" """Mock LLM client to avoid hitting live APIs."""
pass # TODO: implement mock class _Mock:
async def complete(self, prompt: str, temperature: float = 0.7) -> str: # type: ignore
return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}"
return _Mock()
@pytest.fixture @pytest.fixture
def mock_asr_client(monkeypatch): def mock_asr_client(monkeypatch):
"""Mock ASR client to avoid hitting live APIs.""" """Mock ASR client to avoid hitting live APIs."""
pass # TODO: implement mock class _Mock:
async def transcribe(self, audio_bytes): # type: ignore
return ""
return _Mock()
@pytest.fixture @pytest.fixture

View File

@ -1,25 +1,62 @@
"""Phase 1 tests: LLM client. """Phase 1 tests: LLM client.
Covers: Covers:
- OpenAI-compatible API client for Qwen LLM - Async HTTP-based LLM client for Qwen LLM
- Provider switching via .env (OpenRouter, Alibaba Cloud, vLLM) - Provider switching via Settings
- Error handling for API failures - Error handling for API failures
- Mocked responses in test mode - Mocked responses in test mode
""" """
import asyncio
import pytest import pytest
import httpx
from unittest.mock import AsyncMock
from app.services.llm_client import LLMClient, LLMClientError
from app.core.config import get_settings
class TestLLMClient: class TestLLMClient:
"""LLM client tests (all external calls mocked).""" """LLM client tests (external calls mocked)."""
def test_llm_call_success(self, mock_llm_client): @pytest.mark.asyncio
"""Should return structured response from mocked LLM.""" async def test_llm_call_success(self, monkeypatch):
pass # TODO: implement """Should return content from mocked LLM API."""
settings = get_settings()
client = LLMClient(settings)
# Mock the underlying HTTP response
class _Resp:
status_code = 200
def json(self):
return {
"choices": [{"message": {"content": "mock response"}}]
}
def raise_for_status(self):
pass
async def _mock_post(*args, **kwargs): # type: ignore
return _Resp()
# Patch AsyncClient.post
if hasattr(client, "_client") and client._client is not None:
client._client.post = _mock_post # type: ignore
result = await client.complete(prompt="test prompt", temperature=0.7)
assert isinstance(result, str)
assert "mock" in result
def test_llm_provider_switching(self): def test_llm_provider_switching(self):
"""Should switch base URL based on .env config.""" settings = get_settings()
pass # TODO: implement # Ensure base URL comes from settings via client; the client stores base_url
client = LLMClient(settings)
assert settings.llm_base_url.rstrip("/") in client.base_url
def test_llm_api_error_handling(self): @pytest.mark.asyncio
"""Should handle HTTP errors from LLM provider.""" async def test_llm_api_error_handling(self, monkeypatch):
pass # TODO: implement settings = get_settings()
client = LLMClient(settings)
async def _mock_post(*args, **kwargs): # type: ignore
raise httpx.HTTPStatusError("err", request=None, response=None) # type: ignore
client._client.post = _mock_post # type: ignore
with pytest.raises(LLMClientError):
await client.complete(prompt="test", temperature=0.7)

View File

@ -8,15 +8,13 @@ Covers:
""" """
import pytest import pytest
from fastapi.testclient import TestClient from fastapi.testclient import TestClient
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, AsyncMock, patch
class TestQuery: class TestQuery:
"""RAG query endpoint tests."""
@pytest.fixture @pytest.fixture
def client(self): def client(self):
"""Create test client with mocked dependencies."""
from app.main import app from app.main import app
return TestClient(app) return TestClient(app)
@ -24,7 +22,7 @@ class TestQuery:
"""Should return bullet-point answer with source metadata.""" """Should return bullet-point answer with source metadata."""
with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class: with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
mock_decomposer = MagicMock() mock_decomposer = MagicMock()
mock_decomposer.decompose.return_value = ["test", "keywords"] mock_decomposer.decompose = AsyncMock(return_value=["test", "keywords"])
mock_decomposer_class.return_value = mock_decomposer mock_decomposer_class.return_value = mock_decomposer
with patch("app.routers.query.RAGService") as mock_rag_class: with patch("app.routers.query.RAGService") as mock_rag_class:
@ -33,15 +31,15 @@ class TestQuery:
("chunk one", {"filename": "test.pdf"}, 0.1), ("chunk one", {"filename": "test.pdf"}, 0.1),
("chunk two", {"filename": "test.pdf"}, 0.2), ("chunk two", {"filename": "test.pdf"}, 0.2),
] ]
mock_rag.generate_response.return_value = "- Bullet point answer\n- Another point" mock_rag.generate_response = AsyncMock(return_value="- Bullet point answer\n- Another point")
mock_rag_class.return_value = mock_rag mock_rag_class.return_value = mock_rag
with patch("app.routers.query.RelevanceFilter") as mock_filter_class: with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
mock_filter = MagicMock() mock_filter = MagicMock()
mock_filter.filter.return_value = [ mock_filter.filter = AsyncMock(return_value=[
("chunk one", {"filename": "test.pdf"}), ("chunk one", {"filename": "test.pdf"}),
("chunk two", {"filename": "test.pdf"}), ("chunk two", {"filename": "test.pdf"}),
] ])
mock_filter_class.return_value = mock_filter mock_filter_class.return_value = mock_filter
response = client.post( response = client.post(
@ -63,7 +61,7 @@ class TestQuery:
"""Should handle case when no relevant chunks found.""" """Should handle case when no relevant chunks found."""
with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class: with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
mock_decomposer = MagicMock() mock_decomposer = MagicMock()
mock_decomposer.decompose.return_value = ["test"] mock_decomposer.decompose = AsyncMock(return_value=["test"])
mock_decomposer_class.return_value = mock_decomposer mock_decomposer_class.return_value = mock_decomposer
with patch("app.routers.query.RAGService") as mock_rag_class: with patch("app.routers.query.RAGService") as mock_rag_class:
@ -71,12 +69,12 @@ class TestQuery:
mock_rag.retrieve.return_value = [ mock_rag.retrieve.return_value = [
("chunk one", {"filename": "test.pdf"}, 0.1), ("chunk one", {"filename": "test.pdf"}, 0.1),
] ]
mock_rag.generate_response.return_value = "I could not find any relevant information." mock_rag.generate_response = AsyncMock(return_value="I could not find any relevant information.")
mock_rag_class.return_value = mock_rag mock_rag_class.return_value = mock_rag
with patch("app.routers.query.RelevanceFilter") as mock_filter_class: with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
mock_filter = MagicMock() mock_filter = MagicMock()
mock_filter.filter.return_value = [] mock_filter.filter = AsyncMock(return_value=[])
mock_filter_class.return_value = mock_filter mock_filter_class.return_value = mock_filter
response = client.post( response = client.post(

View File

@ -9,52 +9,47 @@ from app.services.query_decomposer import QueryDecomposer
class MockLLMClient: class MockLLMClient:
"""Simple mock LLM client with a fixed response."""
def __init__(self, response: str): def __init__(self, response: str):
self._response = response self._response = response
self.last_prompt = None self.last_prompt = None
def complete(self, prompt: str, temperature: float = 0.7) -> str: async def complete(self, prompt: str, temperature: float = 0.7) -> str:
self.last_prompt = prompt self.last_prompt = prompt
return self._response return self._response
def test_decompose_valid_json(): async def test_decompose_valid_json():
llm = MockLLMClient('["alpha", "beta", "gamma"]') llm = MockLLMClient('["alpha", "beta", "gamma"]')
decomposer = QueryDecomposer(llm) decomposer = QueryDecomposer(llm)
result: List[str] = decomposer.decompose("What are keywords for X?") result: List[str] = await decomposer.decompose("What are keywords for X?")
assert result == ["alpha", "beta", "gamma"] assert result == ["alpha", "beta", "gamma"]
# Ensure the prompt was constructed with the given question
assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array" assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array"
def test_decompose_empty_question_returns_empty(): async def test_decompose_empty_question_returns_empty():
llm = MockLLMClient('["should_not_be_used"]') llm = MockLLMClient('["should_not_be_used"]')
decomposer = QueryDecomposer(llm) decomposer = QueryDecomposer(llm)
result = decomposer.decompose("") result = await decomposer.decompose("")
assert result == [] assert result == []
# LLM should not be called for empty input
assert llm.last_prompt is None assert llm.last_prompt is None
def test_decompose_invalid_json_returns_empty(): async def test_decompose_invalid_json_returns_empty():
llm = MockLLMClient("not-json") llm = MockLLMClient("not-json")
decomposer = QueryDecomposer(llm) decomposer = QueryDecomposer(llm)
result = decomposer.decompose("Question?") result = await decomposer.decompose("Question?")
assert result == [] assert result == []
def test_decompose_non_list_json_returns_empty(): async def test_decompose_non_list_json_returns_empty():
llm = MockLLMClient("{\"a\": 1}") llm = MockLLMClient("{\"a\": 1}")
decomposer = QueryDecomposer(llm) decomposer = QueryDecomposer(llm)
result = decomposer.decompose("Question?") result = await decomposer.decompose("Question?")
assert result == [] assert result == []
def test_decompose_mixed_types_coerced_to_strings(): async def test_decompose_mixed_types_coerced_to_strings():
llm = MockLLMClient('["a", 2, null]') llm = MockLLMClient('["a", 2, null]')
decomposer = QueryDecomposer(llm) decomposer = QueryDecomposer(llm)
result = decomposer.decompose("Question?") result = await decomposer.decompose("Question?")
# Non-string items should be coerced to strings
assert result == ["a", "2", "None"] assert result == ["a", "2", "None"]

View File

@ -7,7 +7,7 @@ Covers:
- Metadata handling per chunk - Metadata handling per chunk
""" """
import pytest import pytest
from unittest.mock import MagicMock, patch from unittest.mock import MagicMock, AsyncMock
class TestRAGService: class TestRAGService:
@ -96,7 +96,7 @@ class TestRAGService:
assert results == [] assert results == []
def test_generate_response_calls_llm(self): async def test_generate_response_calls_llm(self):
"""Should call LLM with strict RAG prompt.""" """Should call LLM with strict RAG prompt."""
from app.services.rag import RAGService from app.services.rag import RAGService
@ -105,14 +105,14 @@ class TestRAGService:
mock_client.get_or_create_collection.return_value = mock_collection mock_client.get_or_create_collection.return_value = mock_collection
mock_llm = MagicMock() mock_llm = MagicMock()
mock_llm.complete.return_value = "- Bullet point answer" mock_llm.complete = AsyncMock(return_value="- Bullet point answer")
service = RAGService(chroma_client=mock_client, llm_client=mock_llm) service = RAGService(chroma_client=mock_client, llm_client=mock_llm)
chunks = ["relevant chunk"] chunks = ["relevant chunk"]
metadata = [{"filename": "test.txt", "content_summary": "summary"}] metadata = [{"filename": "test.txt", "content_summary": "summary"}]
answer = service.generate_response("What is this?", chunks, metadata) answer = await service.generate_response("What is this?", chunks, metadata)
mock_llm.complete.assert_called_once() mock_llm.complete.assert_called_once()
prompt = mock_llm.complete.call_args[1]["prompt"] prompt = mock_llm.complete.call_args[1]["prompt"]
@ -122,7 +122,7 @@ class TestRAGService:
assert "only these document chunks" in prompt.lower() assert "only these document chunks" in prompt.lower()
assert answer == "- Bullet point answer" assert answer == "- Bullet point answer"
def test_generate_response_no_chunks(self): async def test_generate_response_no_chunks(self):
"""Should return fallback message when no chunks provided.""" """Should return fallback message when no chunks provided."""
from app.services.rag import RAGService from app.services.rag import RAGService
@ -132,6 +132,6 @@ class TestRAGService:
service = RAGService(chroma_client=mock_client, llm_client=MagicMock()) service = RAGService(chroma_client=mock_client, llm_client=MagicMock())
answer = service.generate_response("What is this?", [], []) answer = await service.generate_response("What is this?", [], [])
assert "no relevant" in answer.lower() or "could not find" in answer.lower() assert "no relevant" in answer.lower() or "could not find" in answer.lower()

View File

@ -1,23 +1,8 @@
import json import json
import pytest import pytest
from unittest.mock import MagicMock from unittest.mock import AsyncMock, MagicMock
# Import strategy: try standard import first, fallback to path hack if needed. from app.services.relevance_filter import RelevanceFilter
try:
from app.services.relevance_filter import RelevanceFilter # type: ignore
except Exception:
# Fallback: attempt to load module directly by path to avoid import issues
import sys
from pathlib import Path
path_to_module = Path(__file__).resolve().parents[2] / 'app' / 'services' / 'relevance_filter.py'
if path_to_module.exists():
import importlib.util
spec = importlib.util.spec_from_file_location("relevance_filter", str(path_to_module))
module = importlib.util.module_from_spec(spec) # type: ignore
spec.loader.exec_module(module) # type: ignore
RelevanceFilter = module.RelevanceFilter # type: ignore
else:
raise
def _make_chunks(): def _make_chunks():
@ -28,58 +13,56 @@ def _make_chunks():
] ]
def test_filter_basic_returns_only_above_threshold(): async def test_filter_basic_returns_only_above_threshold():
chunks = _make_chunks() chunks = _make_chunks()
llm = MagicMock() llm = MagicMock()
llm.complete.return_value = "[8.5, 3.2, 9.0]" llm.complete = AsyncMock(return_value="[8.5, 3.2, 9.0]")
rf = RelevanceFilter(llm) rf = RelevanceFilter(llm)
result = rf.filter("What is this about?", chunks, threshold=7.0) result = await rf.filter("What is this about?", chunks, threshold=7.0)
expected = [chunks[0], chunks[2]] expected = [chunks[0], chunks[2]]
assert result == expected assert result == expected
# Ensure a single batch call was made
llm.complete.assert_called_once() llm.complete.assert_called_once()
# Optional validation of prompt structure (contains the question and chunks)
called_prompt = llm.complete.call_args[0][0] called_prompt = llm.complete.call_args[0][0]
assert "What is this about?" in called_prompt assert "What is this about?" in called_prompt
for t in ["Chunk A text", "Chunk B text", "Chunk C text"]: for t in ["Chunk A text", "Chunk B text", "Chunk C text"]:
assert t in called_prompt assert t in called_prompt
def test_filter_empty_chunks_returns_empty_and_no_llm_call(): async def test_filter_empty_chunks_returns_empty_and_no_llm_call():
llm = MagicMock() llm = MagicMock()
llm.complete = AsyncMock()
rf = RelevanceFilter(llm) rf = RelevanceFilter(llm)
result = rf.filter("Question", [], threshold=7.0) result = await rf.filter("Question", [], threshold=7.0)
assert result == [] assert result == []
llm.complete.assert_not_called() llm.complete.assert_not_called()
def test_filter_invalid_json_returns_empty(): async def test_filter_invalid_json_returns_empty():
chunks = _make_chunks() chunks = _make_chunks()
llm = MagicMock() llm = MagicMock()
llm.complete.return_value = "not json" llm.complete = AsyncMock(return_value="not json")
rf = RelevanceFilter(llm) rf = RelevanceFilter(llm)
result = rf.filter("Question", chunks, threshold=7.0) result = await rf.filter("Question", chunks, threshold=7.0)
assert result == [] assert result == []
def test_filter_length_mismatch_returns_empty(): async def test_filter_length_mismatch_returns_empty():
chunks = _make_chunks()[:2] # 2 chunks chunks = _make_chunks()[:2]
llm = MagicMock() llm = MagicMock()
llm.complete.return_value = "[5, 6]" # 2 scores, ok length, but threshold will filter all llm.complete = AsyncMock(return_value="[5, 6]")
rf = RelevanceFilter(llm) rf = RelevanceFilter(llm)
result = rf.filter("Question", chunks, threshold=7.0) result = await rf.filter("Question", chunks, threshold=7.0)
# Length matches, but both below threshold -> empty
assert result == [] assert result == []
def test_filter_all_outside_threshold(): async def test_filter_all_outside_threshold():
chunks = _make_chunks() chunks = _make_chunks()
llm = MagicMock() llm = MagicMock()
llm.complete.return_value = "[1.0, 2.0, 3.0]" llm.complete = AsyncMock(return_value="[1.0, 2.0, 3.0]")
rf = RelevanceFilter(llm) rf = RelevanceFilter(llm)
result = rf.filter("Question", chunks, threshold=5.0) result = await rf.filter("Question", chunks, threshold=5.0)
assert result == [] assert result == []