legco_ai_assistant/backend/app/test/test_phase5_highlight_endpo...

"""Phase 5 highlight endpoint tests: POST /api/v1/v2/highlights/batch and GET /api/v1/v2/highlights.

Covers:
- POST batch returns 200 with HighlightBatchResponse on valid targets
- POST batch returns 422 when request body is invalid (missing fields)
- POST batch returns 200 with status="completed" matching mock
- GET returns 200 text/html on cache hit
- GET returns 404 on cache miss
- GET returns 404 when missing required query params

Uses TestClient + isolated FastAPI app + monkeypatch for mocking.
"""

import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient

from app.models.highlight import (
    ChunkHighlightTarget,
    HighlightBatchResponse,
)
from app.routers import chunks
from app.services.highlight_cache import HighlightCache, compute_cache_key


@pytest.fixture
def client(tmp_path, monkeypatch):
    """Create TestClient with chunks router, isolated DB paths, mocked settings."""
    prompts_path = str(tmp_path / "prompts.db")
    highlights_path = str(tmp_path / "highlights.db")

    # Monkeypatch get_settings to return a settings-like object
    class _FakeSettings:
        prompts_db_path = prompts_path
        llm_api_key = "test-key"
        llm_base_url = "https://example.com"
        llm_model_name = "test-model"
        llm_enable_thinking = False
        vllm_engine = False
        embedding_model = "test-emb"
        embedding_base_url = "https://example.com"
        embedding_api_key = "test-key"
        chroma_db_path = str(tmp_path / "chroma")
        document_chunk_path = str(tmp_path / "chunks")
        history_db_path = str(tmp_path / "history.db")
        cors_origins = ["*"]
        chunk_size = 1000
        chunk_overlap = 200
        retrieval_n_results = 10
        relevance_threshold = 7.0
        llm_timeout = 60.0

    from app.core.config import get_settings
    get_settings.cache_clear()

    monkeypatch.setattr("app.routers.chunks.get_settings", lambda: _FakeSettings())

    test_app = FastAPI()
    test_app.include_router(chunks.router)

    yield TestClient(test_app), _FakeSettings, highlights_path

    get_settings.cache_clear()


# ---------------------------------------------------------------------------
# POST /api/v1/v2/highlights/batch
# ---------------------------------------------------------------------------


class TestPostBatchHighlights:
    """Tests for POST /api/v1/v2/highlights/batch."""

    def test_batch_returns_200_on_valid_targets(self, client, monkeypatch):
        """POST batch returns 200 with HighlightBatchResponse for valid targets."""
        test_client, fake_settings, _ = client

        mock_response = HighlightBatchResponse(
            status="completed", cached_count=2, errors=[]
        )

        async def _mock_compute(self, targets):
            return mock_response

        monkeypatch.setattr(
            "app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
            _mock_compute,
        )

        payload = {
            "targets": [
                {
                    "document_id": "doc1.pdf",
                    "chunk_index": 0,
                    "sub_question_text": "What is X?",
                    "sub_question_index": 0,
                },
                {
                    "document_id": "doc2.pdf",
                    "chunk_index": 1,
                    "sub_question_text": "What is Y?",
                    "sub_question_index": 1,
                },
            ]
        }

        resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
        assert resp.status_code == 200
        data = resp.json()
        assert data["status"] == "completed"
        assert data["cached_count"] == 2
        assert data["errors"] == []

    def test_batch_returns_422_on_invalid_body(self, client):
        """POST batch returns 422 when request body is missing required fields."""
        test_client, _, _ = client

        # Missing targets entirely
        resp = test_client.post("/api/v1/v2/highlights/batch", json={})
        assert resp.status_code == 422

    def test_batch_returns_422_on_invalid_target_fields(self, client):
        """POST batch returns 422 when target objects lack required fields."""
        test_client, _, _ = client

        payload = {
            "targets": [
                {
                    "document_id": "doc1.pdf",
                    # missing chunk_index, sub_question_text, sub_question_index
                }
            ]
        }
        resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
        assert resp.status_code == 422

    def test_batch_returns_completed_with_matching_mock(self, client, monkeypatch):
        """POST batch returns status='completed' and cached_count matches mock."""
        test_client, _, _ = client

        mock_response = HighlightBatchResponse(
            status="completed", cached_count=5, errors=[]
        )

        async def _mock_compute(self, targets):
            return mock_response

        monkeypatch.setattr(
            "app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
            _mock_compute,
        )

        payload = {
            "targets": [
                {
                    "document_id": "doc.pdf",
                    "chunk_index": 0,
                    "sub_question_text": "Q1",
                    "sub_question_index": 0,
                }
            ]
        }
        resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
        assert resp.status_code == 200
        data = resp.json()
        assert data["status"] == "completed"
        assert data["cached_count"] == 5


# ---------------------------------------------------------------------------
# GET /api/v1/v2/highlights
# ---------------------------------------------------------------------------


class TestGetHighlight:
    """Tests for GET /api/v1/v2/highlights."""

    def test_get_returns_200_html_on_cache_hit(self, client):
        """GET returns 200 text/html when cache key exists."""
        test_client, fake_settings, _ = client

        # Build the same cache the router will use
        db_path = str(fake_settings.prompts_db_path).replace(
            "prompts.db", "highlights.db"
        )
        cache = HighlightCache(db_path=db_path)

        doc_id = "doc1.pdf"
        chunk_idx = 3
        sub_q = "What is the budget?"
        cache_key = compute_cache_key(doc_id, chunk_idx, sub_q)

        html_content = "<html><body>highlighted chunk</body></html>"
        cache.set_highlight(
            cache_key=cache_key,
            document_id=doc_id,
            chunk_index=chunk_idx,
            sub_question=sub_q,
            relevant_sentences_json='[]',
            html_content=html_content,
        )

        resp = test_client.get(
            "/api/v1/v2/highlights",
            params={
                "document_id": doc_id,
                "chunk_index": chunk_idx,
                "sub_question": sub_q,
            },
        )
        assert resp.status_code == 200
        assert resp.headers["content-type"] == "text/html; charset=utf-8"
        assert "highlighted chunk" in resp.text

    def test_get_returns_404_on_cache_miss(self, client):
        """GET returns 404 when document_id not in cache."""
        test_client, _, _ = client

        resp = test_client.get(
            "/api/v1/v2/highlights",
            params={
                "document_id": "nonexistent.pdf",
                "chunk_index": 99,
                "sub_question": "unknown question",
            },
        )
        assert resp.status_code == 404

    def test_get_returns_404_on_missing_params(self, client):
        """GET returns 404 (or 422) when required query params are missing."""
        test_client, _, _ = client

        # Missing all params — FastAPI returns 422 for required Query params
        resp = test_client.get("/api/v1/v2/highlights")
        assert resp.status_code == 422