legco_ai_assistant/backend/app/test/test_phase5_highlight_endpo...

236 lines
7.8 KiB
Python

"""Phase 5 highlight endpoint tests: POST /api/v1/v2/highlights/batch and GET /api/v1/v2/highlights.
Covers:
- POST batch returns 200 with HighlightBatchResponse on valid targets
- POST batch returns 422 when request body is invalid (missing fields)
- POST batch returns 200 with status="completed" matching mock
- GET returns 200 text/html on cache hit
- GET returns 404 on cache miss
- GET returns 404 when missing required query params
Uses TestClient + isolated FastAPI app + monkeypatch for mocking.
"""
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from app.models.highlight import (
ChunkHighlightTarget,
HighlightBatchResponse,
)
from app.routers import chunks
from app.services.highlight_cache import HighlightCache, compute_cache_key
@pytest.fixture
def client(tmp_path, monkeypatch):
"""Create TestClient with chunks router, isolated DB paths, mocked settings."""
prompts_path = str(tmp_path / "prompts.db")
highlights_path = str(tmp_path / "highlights.db")
# Monkeypatch get_settings to return a settings-like object
class _FakeSettings:
prompts_db_path = prompts_path
llm_api_key = "test-key"
llm_base_url = "https://example.com"
llm_model_name = "test-model"
llm_enable_thinking = False
vllm_engine = False
embedding_model = "test-emb"
embedding_base_url = "https://example.com"
embedding_api_key = "test-key"
chroma_db_path = str(tmp_path / "chroma")
document_chunk_path = str(tmp_path / "chunks")
history_db_path = str(tmp_path / "history.db")
cors_origins = ["*"]
chunk_size = 1000
chunk_overlap = 200
retrieval_n_results = 10
relevance_threshold = 7.0
llm_timeout = 60.0
from app.core.config import get_settings
get_settings.cache_clear()
monkeypatch.setattr("app.routers.chunks.get_settings", lambda: _FakeSettings())
test_app = FastAPI()
test_app.include_router(chunks.router)
yield TestClient(test_app), _FakeSettings, highlights_path
get_settings.cache_clear()
# ---------------------------------------------------------------------------
# POST /api/v1/v2/highlights/batch
# ---------------------------------------------------------------------------
class TestPostBatchHighlights:
"""Tests for POST /api/v1/v2/highlights/batch."""
def test_batch_returns_200_on_valid_targets(self, client, monkeypatch):
"""POST batch returns 200 with HighlightBatchResponse for valid targets."""
test_client, fake_settings, _ = client
mock_response = HighlightBatchResponse(
status="completed", cached_count=2, errors=[]
)
async def _mock_compute(self, targets):
return mock_response
monkeypatch.setattr(
"app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
_mock_compute,
)
payload = {
"targets": [
{
"document_id": "doc1.pdf",
"chunk_index": 0,
"sub_question_text": "What is X?",
"sub_question_index": 0,
},
{
"document_id": "doc2.pdf",
"chunk_index": 1,
"sub_question_text": "What is Y?",
"sub_question_index": 1,
},
]
}
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "completed"
assert data["cached_count"] == 2
assert data["errors"] == []
def test_batch_returns_422_on_invalid_body(self, client):
"""POST batch returns 422 when request body is missing required fields."""
test_client, _, _ = client
# Missing targets entirely
resp = test_client.post("/api/v1/v2/highlights/batch", json={})
assert resp.status_code == 422
def test_batch_returns_422_on_invalid_target_fields(self, client):
"""POST batch returns 422 when target objects lack required fields."""
test_client, _, _ = client
payload = {
"targets": [
{
"document_id": "doc1.pdf",
# missing chunk_index, sub_question_text, sub_question_index
}
]
}
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
assert resp.status_code == 422
def test_batch_returns_completed_with_matching_mock(self, client, monkeypatch):
"""POST batch returns status='completed' and cached_count matches mock."""
test_client, _, _ = client
mock_response = HighlightBatchResponse(
status="completed", cached_count=5, errors=[]
)
async def _mock_compute(self, targets):
return mock_response
monkeypatch.setattr(
"app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
_mock_compute,
)
payload = {
"targets": [
{
"document_id": "doc.pdf",
"chunk_index": 0,
"sub_question_text": "Q1",
"sub_question_index": 0,
}
]
}
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
assert resp.status_code == 200
data = resp.json()
assert data["status"] == "completed"
assert data["cached_count"] == 5
# ---------------------------------------------------------------------------
# GET /api/v1/v2/highlights
# ---------------------------------------------------------------------------
class TestGetHighlight:
"""Tests for GET /api/v1/v2/highlights."""
def test_get_returns_200_html_on_cache_hit(self, client):
"""GET returns 200 text/html when cache key exists."""
test_client, fake_settings, _ = client
# Build the same cache the router will use
db_path = str(fake_settings.prompts_db_path).replace(
"prompts.db", "highlights.db"
)
cache = HighlightCache(db_path=db_path)
doc_id = "doc1.pdf"
chunk_idx = 3
sub_q = "What is the budget?"
cache_key = compute_cache_key(doc_id, chunk_idx, sub_q)
html_content = "<html><body>highlighted chunk</body></html>"
cache.set_highlight(
cache_key=cache_key,
document_id=doc_id,
chunk_index=chunk_idx,
sub_question=sub_q,
relevant_sentences_json='[]',
html_content=html_content,
)
resp = test_client.get(
"/api/v1/v2/highlights",
params={
"document_id": doc_id,
"chunk_index": chunk_idx,
"sub_question": sub_q,
},
)
assert resp.status_code == 200
assert resp.headers["content-type"] == "text/html; charset=utf-8"
assert "highlighted chunk" in resp.text
def test_get_returns_404_on_cache_miss(self, client):
"""GET returns 404 when document_id not in cache."""
test_client, _, _ = client
resp = test_client.get(
"/api/v1/v2/highlights",
params={
"document_id": "nonexistent.pdf",
"chunk_index": 99,
"sub_question": "unknown question",
},
)
assert resp.status_code == 404
def test_get_returns_404_on_missing_params(self, client):
"""GET returns 404 (or 422) when required query params are missing."""
test_client, _, _ = client
# Missing all params — FastAPI returns 422 for required Query params
resp = test_client.get("/api/v1/v2/highlights")
assert resp.status_code == 422