test(backend): update Phase 1 test suite
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
4cf930dc59
commit
351950f512
|
|
@ -94,4 +94,8 @@ def test_query_keywords_displayed(client, ingested_document):
|
|||
print(f"Extracted keywords: {keywords}")
|
||||
print(f"LLM Answer:\n{answer}")
|
||||
|
||||
assert any(kw.lower() in ["python", "programming", "paradigms"] for kw in keywords) or True
|
||||
assert len(keywords) > 0
|
||||
assert any(
|
||||
kw.lower() in ("python", "programming", "paradigms", "support")
|
||||
for kw in keywords
|
||||
), f"Expected relevant keywords but got: {keywords}"
|
||||
|
|
|
|||
|
|
@ -3,18 +3,27 @@
|
|||
All external LLM/ASR calls must be mocked. Use tmp_path for ChromaDB instances.
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_llm_client(monkeypatch):
|
||||
"""Mock LLM client to avoid hitting live APIs."""
|
||||
pass # TODO: implement mock
|
||||
class _Mock:
|
||||
async def complete(self, prompt: str, temperature: float = 0.7) -> str: # type: ignore
|
||||
return "{\"choices\": [{\"message\": {\"content\": \"mock response\"}}]}"
|
||||
|
||||
return _Mock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_asr_client(monkeypatch):
|
||||
"""Mock ASR client to avoid hitting live APIs."""
|
||||
pass # TODO: implement mock
|
||||
class _Mock:
|
||||
async def transcribe(self, audio_bytes): # type: ignore
|
||||
return ""
|
||||
|
||||
return _Mock()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
|
|
|
|||
|
|
@ -1,25 +1,62 @@
|
|||
"""Phase 1 tests: LLM client.
|
||||
|
||||
Covers:
|
||||
- OpenAI-compatible API client for Qwen LLM
|
||||
- Provider switching via .env (OpenRouter, Alibaba Cloud, vLLM)
|
||||
- Async HTTP-based LLM client for Qwen LLM
|
||||
- Provider switching via Settings
|
||||
- Error handling for API failures
|
||||
- Mocked responses in test mode
|
||||
"""
|
||||
import asyncio
|
||||
import pytest
|
||||
import httpx
|
||||
from unittest.mock import AsyncMock
|
||||
from app.services.llm_client import LLMClient, LLMClientError
|
||||
from app.core.config import get_settings
|
||||
|
||||
|
||||
class TestLLMClient:
|
||||
"""LLM client tests (all external calls mocked)."""
|
||||
"""LLM client tests (external calls mocked)."""
|
||||
|
||||
def test_llm_call_success(self, mock_llm_client):
|
||||
"""Should return structured response from mocked LLM."""
|
||||
pass # TODO: implement
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_call_success(self, monkeypatch):
|
||||
"""Should return content from mocked LLM API."""
|
||||
settings = get_settings()
|
||||
client = LLMClient(settings)
|
||||
|
||||
# Mock the underlying HTTP response
|
||||
class _Resp:
|
||||
status_code = 200
|
||||
def json(self):
|
||||
return {
|
||||
"choices": [{"message": {"content": "mock response"}}]
|
||||
}
|
||||
def raise_for_status(self):
|
||||
pass
|
||||
|
||||
async def _mock_post(*args, **kwargs): # type: ignore
|
||||
return _Resp()
|
||||
|
||||
# Patch AsyncClient.post
|
||||
if hasattr(client, "_client") and client._client is not None:
|
||||
client._client.post = _mock_post # type: ignore
|
||||
result = await client.complete(prompt="test prompt", temperature=0.7)
|
||||
assert isinstance(result, str)
|
||||
assert "mock" in result
|
||||
|
||||
def test_llm_provider_switching(self):
|
||||
"""Should switch base URL based on .env config."""
|
||||
pass # TODO: implement
|
||||
settings = get_settings()
|
||||
# Ensure base URL comes from settings via client; the client stores base_url
|
||||
client = LLMClient(settings)
|
||||
assert settings.llm_base_url.rstrip("/") in client.base_url
|
||||
|
||||
def test_llm_api_error_handling(self):
|
||||
"""Should handle HTTP errors from LLM provider."""
|
||||
pass # TODO: implement
|
||||
@pytest.mark.asyncio
|
||||
async def test_llm_api_error_handling(self, monkeypatch):
|
||||
settings = get_settings()
|
||||
client = LLMClient(settings)
|
||||
|
||||
async def _mock_post(*args, **kwargs): # type: ignore
|
||||
raise httpx.HTTPStatusError("err", request=None, response=None) # type: ignore
|
||||
|
||||
client._client.post = _mock_post # type: ignore
|
||||
with pytest.raises(LLMClientError):
|
||||
await client.complete(prompt="test", temperature=0.7)
|
||||
|
|
|
|||
|
|
@ -8,15 +8,13 @@ Covers:
|
|||
"""
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock, AsyncMock, patch
|
||||
|
||||
|
||||
class TestQuery:
|
||||
"""RAG query endpoint tests."""
|
||||
|
||||
@pytest.fixture
|
||||
def client(self):
|
||||
"""Create test client with mocked dependencies."""
|
||||
from app.main import app
|
||||
return TestClient(app)
|
||||
|
||||
|
|
@ -24,7 +22,7 @@ class TestQuery:
|
|||
"""Should return bullet-point answer with source metadata."""
|
||||
with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
|
||||
mock_decomposer = MagicMock()
|
||||
mock_decomposer.decompose.return_value = ["test", "keywords"]
|
||||
mock_decomposer.decompose = AsyncMock(return_value=["test", "keywords"])
|
||||
mock_decomposer_class.return_value = mock_decomposer
|
||||
|
||||
with patch("app.routers.query.RAGService") as mock_rag_class:
|
||||
|
|
@ -33,15 +31,15 @@ class TestQuery:
|
|||
("chunk one", {"filename": "test.pdf"}, 0.1),
|
||||
("chunk two", {"filename": "test.pdf"}, 0.2),
|
||||
]
|
||||
mock_rag.generate_response.return_value = "- Bullet point answer\n- Another point"
|
||||
mock_rag.generate_response = AsyncMock(return_value="- Bullet point answer\n- Another point")
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
|
||||
mock_filter = MagicMock()
|
||||
mock_filter.filter.return_value = [
|
||||
mock_filter.filter = AsyncMock(return_value=[
|
||||
("chunk one", {"filename": "test.pdf"}),
|
||||
("chunk two", {"filename": "test.pdf"}),
|
||||
]
|
||||
])
|
||||
mock_filter_class.return_value = mock_filter
|
||||
|
||||
response = client.post(
|
||||
|
|
@ -63,7 +61,7 @@ class TestQuery:
|
|||
"""Should handle case when no relevant chunks found."""
|
||||
with patch("app.routers.query.QueryDecomposer") as mock_decomposer_class:
|
||||
mock_decomposer = MagicMock()
|
||||
mock_decomposer.decompose.return_value = ["test"]
|
||||
mock_decomposer.decompose = AsyncMock(return_value=["test"])
|
||||
mock_decomposer_class.return_value = mock_decomposer
|
||||
|
||||
with patch("app.routers.query.RAGService") as mock_rag_class:
|
||||
|
|
@ -71,12 +69,12 @@ class TestQuery:
|
|||
mock_rag.retrieve.return_value = [
|
||||
("chunk one", {"filename": "test.pdf"}, 0.1),
|
||||
]
|
||||
mock_rag.generate_response.return_value = "I could not find any relevant information."
|
||||
mock_rag.generate_response = AsyncMock(return_value="I could not find any relevant information.")
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
with patch("app.routers.query.RelevanceFilter") as mock_filter_class:
|
||||
mock_filter = MagicMock()
|
||||
mock_filter.filter.return_value = []
|
||||
mock_filter.filter = AsyncMock(return_value=[])
|
||||
mock_filter_class.return_value = mock_filter
|
||||
|
||||
response = client.post(
|
||||
|
|
|
|||
|
|
@ -9,52 +9,47 @@ from app.services.query_decomposer import QueryDecomposer
|
|||
|
||||
|
||||
class MockLLMClient:
|
||||
"""Simple mock LLM client with a fixed response."""
|
||||
|
||||
def __init__(self, response: str):
|
||||
self._response = response
|
||||
self.last_prompt = None
|
||||
|
||||
def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||
self.last_prompt = prompt
|
||||
return self._response
|
||||
|
||||
|
||||
def test_decompose_valid_json():
|
||||
async def test_decompose_valid_json():
|
||||
llm = MockLLMClient('["alpha", "beta", "gamma"]')
|
||||
decomposer = QueryDecomposer(llm)
|
||||
result: List[str] = decomposer.decompose("What are keywords for X?")
|
||||
result: List[str] = await decomposer.decompose("What are keywords for X?")
|
||||
assert result == ["alpha", "beta", "gamma"]
|
||||
# Ensure the prompt was constructed with the given question
|
||||
assert llm.last_prompt == "Given question: 'What are keywords for X?', extract key search keywords as JSON array"
|
||||
|
||||
|
||||
def test_decompose_empty_question_returns_empty():
|
||||
async def test_decompose_empty_question_returns_empty():
|
||||
llm = MockLLMClient('["should_not_be_used"]')
|
||||
decomposer = QueryDecomposer(llm)
|
||||
result = decomposer.decompose("")
|
||||
result = await decomposer.decompose("")
|
||||
assert result == []
|
||||
# LLM should not be called for empty input
|
||||
assert llm.last_prompt is None
|
||||
|
||||
|
||||
def test_decompose_invalid_json_returns_empty():
|
||||
async def test_decompose_invalid_json_returns_empty():
|
||||
llm = MockLLMClient("not-json")
|
||||
decomposer = QueryDecomposer(llm)
|
||||
result = decomposer.decompose("Question?")
|
||||
result = await decomposer.decompose("Question?")
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_decompose_non_list_json_returns_empty():
|
||||
async def test_decompose_non_list_json_returns_empty():
|
||||
llm = MockLLMClient("{\"a\": 1}")
|
||||
decomposer = QueryDecomposer(llm)
|
||||
result = decomposer.decompose("Question?")
|
||||
result = await decomposer.decompose("Question?")
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_decompose_mixed_types_coerced_to_strings():
|
||||
async def test_decompose_mixed_types_coerced_to_strings():
|
||||
llm = MockLLMClient('["a", 2, null]')
|
||||
decomposer = QueryDecomposer(llm)
|
||||
result = decomposer.decompose("Question?")
|
||||
# Non-string items should be coerced to strings
|
||||
result = await decomposer.decompose("Question?")
|
||||
assert result == ["a", "2", "None"]
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ Covers:
|
|||
- Metadata handling per chunk
|
||||
"""
|
||||
import pytest
|
||||
from unittest.mock import MagicMock, patch
|
||||
from unittest.mock import MagicMock, AsyncMock
|
||||
|
||||
|
||||
class TestRAGService:
|
||||
|
|
@ -96,7 +96,7 @@ class TestRAGService:
|
|||
|
||||
assert results == []
|
||||
|
||||
def test_generate_response_calls_llm(self):
|
||||
async def test_generate_response_calls_llm(self):
|
||||
"""Should call LLM with strict RAG prompt."""
|
||||
from app.services.rag import RAGService
|
||||
|
||||
|
|
@ -105,14 +105,14 @@ class TestRAGService:
|
|||
mock_client.get_or_create_collection.return_value = mock_collection
|
||||
|
||||
mock_llm = MagicMock()
|
||||
mock_llm.complete.return_value = "- Bullet point answer"
|
||||
mock_llm.complete = AsyncMock(return_value="- Bullet point answer")
|
||||
|
||||
service = RAGService(chroma_client=mock_client, llm_client=mock_llm)
|
||||
|
||||
chunks = ["relevant chunk"]
|
||||
metadata = [{"filename": "test.txt", "content_summary": "summary"}]
|
||||
|
||||
answer = service.generate_response("What is this?", chunks, metadata)
|
||||
answer = await service.generate_response("What is this?", chunks, metadata)
|
||||
|
||||
mock_llm.complete.assert_called_once()
|
||||
prompt = mock_llm.complete.call_args[1]["prompt"]
|
||||
|
|
@ -122,7 +122,7 @@ class TestRAGService:
|
|||
assert "only these document chunks" in prompt.lower()
|
||||
assert answer == "- Bullet point answer"
|
||||
|
||||
def test_generate_response_no_chunks(self):
|
||||
async def test_generate_response_no_chunks(self):
|
||||
"""Should return fallback message when no chunks provided."""
|
||||
from app.services.rag import RAGService
|
||||
|
||||
|
|
@ -132,6 +132,6 @@ class TestRAGService:
|
|||
|
||||
service = RAGService(chroma_client=mock_client, llm_client=MagicMock())
|
||||
|
||||
answer = service.generate_response("What is this?", [], [])
|
||||
answer = await service.generate_response("What is this?", [], [])
|
||||
|
||||
assert "no relevant" in answer.lower() or "could not find" in answer.lower()
|
||||
|
|
|
|||
|
|
@ -1,23 +1,8 @@
|
|||
import json
|
||||
import pytest
|
||||
from unittest.mock import MagicMock
|
||||
from unittest.mock import AsyncMock, MagicMock
|
||||
|
||||
# Import strategy: try standard import first, fallback to path hack if needed.
|
||||
try:
|
||||
from app.services.relevance_filter import RelevanceFilter # type: ignore
|
||||
except Exception:
|
||||
# Fallback: attempt to load module directly by path to avoid import issues
|
||||
import sys
|
||||
from pathlib import Path
|
||||
path_to_module = Path(__file__).resolve().parents[2] / 'app' / 'services' / 'relevance_filter.py'
|
||||
if path_to_module.exists():
|
||||
import importlib.util
|
||||
spec = importlib.util.spec_from_file_location("relevance_filter", str(path_to_module))
|
||||
module = importlib.util.module_from_spec(spec) # type: ignore
|
||||
spec.loader.exec_module(module) # type: ignore
|
||||
RelevanceFilter = module.RelevanceFilter # type: ignore
|
||||
else:
|
||||
raise
|
||||
from app.services.relevance_filter import RelevanceFilter
|
||||
|
||||
|
||||
def _make_chunks():
|
||||
|
|
@ -28,58 +13,56 @@ def _make_chunks():
|
|||
]
|
||||
|
||||
|
||||
def test_filter_basic_returns_only_above_threshold():
|
||||
async def test_filter_basic_returns_only_above_threshold():
|
||||
chunks = _make_chunks()
|
||||
llm = MagicMock()
|
||||
llm.complete.return_value = "[8.5, 3.2, 9.0]"
|
||||
llm.complete = AsyncMock(return_value="[8.5, 3.2, 9.0]")
|
||||
|
||||
rf = RelevanceFilter(llm)
|
||||
result = rf.filter("What is this about?", chunks, threshold=7.0)
|
||||
result = await rf.filter("What is this about?", chunks, threshold=7.0)
|
||||
|
||||
expected = [chunks[0], chunks[2]]
|
||||
assert result == expected
|
||||
# Ensure a single batch call was made
|
||||
llm.complete.assert_called_once()
|
||||
|
||||
# Optional validation of prompt structure (contains the question and chunks)
|
||||
called_prompt = llm.complete.call_args[0][0]
|
||||
assert "What is this about?" in called_prompt
|
||||
for t in ["Chunk A text", "Chunk B text", "Chunk C text"]:
|
||||
assert t in called_prompt
|
||||
|
||||
|
||||
def test_filter_empty_chunks_returns_empty_and_no_llm_call():
|
||||
async def test_filter_empty_chunks_returns_empty_and_no_llm_call():
|
||||
llm = MagicMock()
|
||||
llm.complete = AsyncMock()
|
||||
rf = RelevanceFilter(llm)
|
||||
result = rf.filter("Question", [], threshold=7.0)
|
||||
result = await rf.filter("Question", [], threshold=7.0)
|
||||
assert result == []
|
||||
llm.complete.assert_not_called()
|
||||
|
||||
|
||||
def test_filter_invalid_json_returns_empty():
|
||||
async def test_filter_invalid_json_returns_empty():
|
||||
chunks = _make_chunks()
|
||||
llm = MagicMock()
|
||||
llm.complete.return_value = "not json"
|
||||
llm.complete = AsyncMock(return_value="not json")
|
||||
|
||||
rf = RelevanceFilter(llm)
|
||||
result = rf.filter("Question", chunks, threshold=7.0)
|
||||
result = await rf.filter("Question", chunks, threshold=7.0)
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_filter_length_mismatch_returns_empty():
|
||||
chunks = _make_chunks()[:2] # 2 chunks
|
||||
async def test_filter_length_mismatch_returns_empty():
|
||||
chunks = _make_chunks()[:2]
|
||||
llm = MagicMock()
|
||||
llm.complete.return_value = "[5, 6]" # 2 scores, ok length, but threshold will filter all
|
||||
llm.complete = AsyncMock(return_value="[5, 6]")
|
||||
rf = RelevanceFilter(llm)
|
||||
result = rf.filter("Question", chunks, threshold=7.0)
|
||||
# Length matches, but both below threshold -> empty
|
||||
result = await rf.filter("Question", chunks, threshold=7.0)
|
||||
assert result == []
|
||||
|
||||
|
||||
def test_filter_all_outside_threshold():
|
||||
async def test_filter_all_outside_threshold():
|
||||
chunks = _make_chunks()
|
||||
llm = MagicMock()
|
||||
llm.complete.return_value = "[1.0, 2.0, 3.0]"
|
||||
llm.complete = AsyncMock(return_value="[1.0, 2.0, 3.0]")
|
||||
rf = RelevanceFilter(llm)
|
||||
result = rf.filter("Question", chunks, threshold=5.0)
|
||||
result = await rf.filter("Question", chunks, threshold=5.0)
|
||||
assert result == []
|
||||
|
|
|
|||
Loading…
Reference in New Issue