"""Phase 9 tests: Evaluation API endpoint integration (Sub-Phase 9.3)."""
import json
from unittest.mock import AsyncMock, patch

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from app.models.testing import (
    ChunkAccuracy,
    DimensionScores,
    EvaluatorConfig,
    EvaluationResult,
    FilteredResult,
    GenerateResult,
    InputInfo,
    KeyQuestionsEvalEntry,
    KeyQuestionsEvalResult,
    ResponseResult,
    RetrievalResult,
    TimingInfo,
)


@pytest.fixture(autouse=True)
def _set_api_keys(monkeypatch):
    monkeypatch.setenv("LLM_API_KEY", "test-key")
    monkeypatch.setenv("DP_API_KEY", "test-dp-key")
    monkeypatch.setenv("DASHSCOPE_API_KEY", "test-dashscope-key")


@pytest.fixture
def client(tmp_path, monkeypatch):
    results_dir = str(tmp_path / "test_results")
    evals_dir = str(tmp_path / "test_evaluations")
    prompts_path = str(tmp_path / "prompts.db")
    history_path = str(tmp_path / "history.db")

    monkeypatch.setenv("TEST_RESULTS_DIR", results_dir)
    monkeypatch.setenv("TEST_EVALUATIONS_DIR", evals_dir)
    monkeypatch.setenv("PROMPTS_DB_PATH", prompts_path)
    monkeypatch.setenv("HISTORY_DB_PATH", history_path)
    monkeypatch.setenv("LLM_API_KEY", "test-key")
    monkeypatch.setenv("LLM_BASE_URL", "https://test.example.com/v1")
    monkeypatch.setenv("LLM_MODEL_NAME", "test-model")
    monkeypatch.setenv("EMBEDDING_MODEL", "test-embedding")

    from app.core.config import get_settings
    get_settings.cache_clear()

    from app.core.sqlite_db import _get_db, init_prompts_db, init_history_db, seed_default_profiles
    conn = _get_db(prompts_path)
    init_prompts_db(conn)
    seed_default_profiles(conn)
    conn.close()

    hconn = _get_db(history_path)
    init_history_db(hconn)
    hconn.close()

    from app.routers.test_evaluate import router
    test_app = FastAPI()
    test_app.include_router(router, prefix="/api/v1")
    yield TestClient(test_app)

    get_settings.cache_clear()


def _make_sample_result():
    return GenerateResult(
        result_id="test-result-001",
        input_type="text",
        profile="A",
        input=InputInfo(text="test question"),
        extracted_key_questions=["q1", "q2"],
        retrieval=RetrievalResult(per_sub_question=[], total_chunks_retrieved=10, retriever_time_ms=100),
        filtered=FilteredResult(per_sub_question=[], total_chunks_filtered=5, filter_time_ms=100),
        response=ResponseResult(final_answer="answer", sub_question_sources=[], generate_time_ms=100),
        timing=TimingInfo(decomposer_time_ms=100, retriever_time_ms=100, filter_time_ms=100, generator_time_ms=100, total_time_ms=400),
    )


@pytest.fixture
def saved_result(client):
    from app.services.test_storage_service import TestStorageService
    from app.core.config import get_settings

    result = _make_sample_result()
    svc = TestStorageService(get_settings().test_results_dir, get_settings().test_evaluations_dir)
    svc.save_result(result)
    return result.result_id


class TestEvaluateEndpoint:
    @pytest.mark.asyncio
    async def test_valid_evaluate_returns_200(self, client, saved_result):
        mock_scores = DimensionScores(dimension_1_準確性=35.0, dimension_2_完整性=22.0, dimension_3_清晰度=18.0, dimension_4_簡潔性=13.0)
        mock_kq = KeyQuestionsEvalResult(
            evaluations=[
                KeyQuestionsEvalEntry(model_name="m1", scores=mock_scores, total_score=88, max_score=100, comments="ok", thinking_trace="", time_ms=100),
                KeyQuestionsEvalEntry(model_name="m2", scores=mock_scores, total_score=88, max_score=100, comments="ok", thinking_trace="", time_ms=100),
            ],
            average_scores=mock_scores,
            average_total=88.0,
        )

        payload = {
            "result_id": saved_result,
            "evaluation_config": {
                "key_questions_evaluators": [
                    {"model_name": "deepseek-v4-pro", "base_url": "https://api.deepseek.com", "api_key_env": "DP_API_KEY", "enable_thinking": True},
                    {"model_name": "qwen3-7b-max", "base_url": "https://dashscope.example.com/v1", "api_key_env": "DASHSCOPE_API_KEY", "enable_thinking": True},
                ],
                "chunk_evaluator": {"model_name": "test", "base_url": "https://test.example.com", "api_key_env": "LLM_API_KEY", "enable_thinking": True},
                "response_evaluator": {"model_name": "test", "base_url": "https://test.example.com", "api_key_env": "LLM_API_KEY", "enable_thinking": True},
            },
        }

        resp = client.post("/api/v1/test/evaluate", json=payload)
        assert resp.status_code == 200
        data = resp.json()
        assert data["status"] in ("completed", "partial")
        assert "evaluation_id" in data

    def test_missing_result_returns_404(self, client):
        payload = {
            "result_id": "no-such-id",
            "evaluation_config": {
                "key_questions_evaluators": [],
                "chunk_evaluator": {"model_name": "t", "base_url": "https://x.com", "api_key_env": "LLM_API_KEY"},
                "response_evaluator": {"model_name": "t", "base_url": "https://x.com", "api_key_env": "LLM_API_KEY"},
            },
        }
        resp = client.post("/api/v1/test/evaluate", json=payload)
        assert resp.status_code == 404