"""Phase 9 tests: Evaluation API endpoint integration (Sub-Phase 9.3).""" import json from unittest.mock import AsyncMock, patch import pytest from fastapi import FastAPI from fastapi.testclient import TestClient from app.models.testing import ( ChunkAccuracy, DimensionScores, EvaluatorConfig, EvaluationResult, FilteredResult, GenerateResult, InputInfo, KeyQuestionsEvalEntry, KeyQuestionsEvalResult, ResponseResult, RetrievalResult, TimingInfo, ) @pytest.fixture(autouse=True) def _set_api_keys(monkeypatch): monkeypatch.setenv("LLM_API_KEY", "test-key") monkeypatch.setenv("DP_API_KEY", "test-dp-key") monkeypatch.setenv("DASHSCOPE_API_KEY", "test-dashscope-key") @pytest.fixture def client(tmp_path, monkeypatch): results_dir = str(tmp_path / "test_results") evals_dir = str(tmp_path / "test_evaluations") prompts_path = str(tmp_path / "prompts.db") history_path = str(tmp_path / "history.db") monkeypatch.setenv("TEST_RESULTS_DIR", results_dir) monkeypatch.setenv("TEST_EVALUATIONS_DIR", evals_dir) monkeypatch.setenv("PROMPTS_DB_PATH", prompts_path) monkeypatch.setenv("HISTORY_DB_PATH", history_path) monkeypatch.setenv("LLM_API_KEY", "test-key") monkeypatch.setenv("LLM_BASE_URL", "https://test.example.com/v1") monkeypatch.setenv("LLM_MODEL_NAME", "test-model") monkeypatch.setenv("EMBEDDING_MODEL", "test-embedding") from app.core.config import get_settings get_settings.cache_clear() from app.core.sqlite_db import _get_db, init_prompts_db, init_history_db, seed_default_profiles conn = _get_db(prompts_path) init_prompts_db(conn) seed_default_profiles(conn) conn.close() hconn = _get_db(history_path) init_history_db(hconn) hconn.close() from app.routers.test_evaluate import router test_app = FastAPI() test_app.include_router(router, prefix="/api/v1") yield TestClient(test_app) get_settings.cache_clear() def _make_sample_result(): return GenerateResult( result_id="test-result-001", input_type="text", profile="A", input=InputInfo(text="test question"), extracted_key_questions=["q1", "q2"], retrieval=RetrievalResult(per_sub_question=[], total_chunks_retrieved=10, retriever_time_ms=100), filtered=FilteredResult(per_sub_question=[], total_chunks_filtered=5, filter_time_ms=100), response=ResponseResult(final_answer="answer", sub_question_sources=[], generate_time_ms=100), timing=TimingInfo(decomposer_time_ms=100, retriever_time_ms=100, filter_time_ms=100, generator_time_ms=100, total_time_ms=400), ) @pytest.fixture def saved_result(client): from app.services.test_storage_service import TestStorageService from app.core.config import get_settings result = _make_sample_result() svc = TestStorageService(get_settings().test_results_dir, get_settings().test_evaluations_dir) svc.save_result(result) return result.result_id class TestEvaluateEndpoint: @pytest.mark.asyncio async def test_valid_evaluate_returns_200(self, client, saved_result): mock_scores = DimensionScores(dimension_1_準確性=35.0, dimension_2_完整性=22.0, dimension_3_清晰度=18.0, dimension_4_簡潔性=13.0) mock_kq = KeyQuestionsEvalResult( evaluations=[ KeyQuestionsEvalEntry(model_name="m1", scores=mock_scores, total_score=88, max_score=100, comments="ok", thinking_trace="", time_ms=100), KeyQuestionsEvalEntry(model_name="m2", scores=mock_scores, total_score=88, max_score=100, comments="ok", thinking_trace="", time_ms=100), ], average_scores=mock_scores, average_total=88.0, ) payload = { "result_id": saved_result, "evaluation_config": { "key_questions_evaluators": [ {"model_name": "deepseek-v4-pro", "base_url": "https://api.deepseek.com", "api_key_env": "DP_API_KEY", "enable_thinking": True}, {"model_name": "qwen3-7b-max", "base_url": "https://dashscope.example.com/v1", "api_key_env": "DASHSCOPE_API_KEY", "enable_thinking": True}, ], "chunk_evaluator": {"model_name": "test", "base_url": "https://test.example.com", "api_key_env": "LLM_API_KEY", "enable_thinking": True}, "response_evaluator": {"model_name": "test", "base_url": "https://test.example.com", "api_key_env": "LLM_API_KEY", "enable_thinking": True}, }, } resp = client.post("/api/v1/test/evaluate", json=payload) assert resp.status_code == 200 data = resp.json() assert data["status"] in ("completed", "partial") assert "evaluation_id" in data def test_missing_result_returns_404(self, client): payload = { "result_id": "no-such-id", "evaluation_config": { "key_questions_evaluators": [], "chunk_evaluator": {"model_name": "t", "base_url": "https://x.com", "api_key_env": "LLM_API_KEY"}, "response_evaluator": {"model_name": "t", "base_url": "https://x.com", "api_key_env": "LLM_API_KEY"}, }, } resp = client.post("/api/v1/test/evaluate", json=payload) assert resp.status_code == 404