236 lines
7.8 KiB
Python
236 lines
7.8 KiB
Python
"""Phase 5 highlight endpoint tests: POST /api/v1/v2/highlights/batch and GET /api/v1/v2/highlights.
|
|
|
|
Covers:
|
|
- POST batch returns 200 with HighlightBatchResponse on valid targets
|
|
- POST batch returns 422 when request body is invalid (missing fields)
|
|
- POST batch returns 200 with status="completed" matching mock
|
|
- GET returns 200 text/html on cache hit
|
|
- GET returns 404 on cache miss
|
|
- GET returns 404 when missing required query params
|
|
|
|
Uses TestClient + isolated FastAPI app + monkeypatch for mocking.
|
|
"""
|
|
|
|
import pytest
|
|
from fastapi import FastAPI
|
|
from fastapi.testclient import TestClient
|
|
|
|
from app.models.highlight import (
|
|
ChunkHighlightTarget,
|
|
HighlightBatchResponse,
|
|
)
|
|
from app.routers import chunks
|
|
from app.services.highlight_cache import HighlightCache, compute_cache_key
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
"""Create TestClient with chunks router, isolated DB paths, mocked settings."""
|
|
prompts_path = str(tmp_path / "prompts.db")
|
|
highlights_path = str(tmp_path / "highlights.db")
|
|
|
|
# Monkeypatch get_settings to return a settings-like object
|
|
class _FakeSettings:
|
|
prompts_db_path = prompts_path
|
|
llm_api_key = "test-key"
|
|
llm_base_url = "https://example.com"
|
|
llm_model_name = "test-model"
|
|
llm_enable_thinking = False
|
|
vllm_engine = False
|
|
embedding_model = "test-emb"
|
|
embedding_base_url = "https://example.com"
|
|
embedding_api_key = "test-key"
|
|
chroma_db_path = str(tmp_path / "chroma")
|
|
document_chunk_path = str(tmp_path / "chunks")
|
|
history_db_path = str(tmp_path / "history.db")
|
|
cors_origins = ["*"]
|
|
chunk_size = 1000
|
|
chunk_overlap = 200
|
|
retrieval_n_results = 10
|
|
relevance_threshold = 7.0
|
|
llm_timeout = 60.0
|
|
|
|
from app.core.config import get_settings
|
|
get_settings.cache_clear()
|
|
|
|
monkeypatch.setattr("app.routers.chunks.get_settings", lambda: _FakeSettings())
|
|
|
|
test_app = FastAPI()
|
|
test_app.include_router(chunks.router)
|
|
|
|
yield TestClient(test_app), _FakeSettings, highlights_path
|
|
|
|
get_settings.cache_clear()
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# POST /api/v1/v2/highlights/batch
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestPostBatchHighlights:
|
|
"""Tests for POST /api/v1/v2/highlights/batch."""
|
|
|
|
def test_batch_returns_200_on_valid_targets(self, client, monkeypatch):
|
|
"""POST batch returns 200 with HighlightBatchResponse for valid targets."""
|
|
test_client, fake_settings, _ = client
|
|
|
|
mock_response = HighlightBatchResponse(
|
|
status="completed", cached_count=2, errors=[]
|
|
)
|
|
|
|
async def _mock_compute(self, targets):
|
|
return mock_response
|
|
|
|
monkeypatch.setattr(
|
|
"app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
|
|
_mock_compute,
|
|
)
|
|
|
|
payload = {
|
|
"targets": [
|
|
{
|
|
"document_id": "doc1.pdf",
|
|
"chunk_index": 0,
|
|
"sub_question_text": "What is X?",
|
|
"sub_question_index": 0,
|
|
},
|
|
{
|
|
"document_id": "doc2.pdf",
|
|
"chunk_index": 1,
|
|
"sub_question_text": "What is Y?",
|
|
"sub_question_index": 1,
|
|
},
|
|
]
|
|
}
|
|
|
|
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["status"] == "completed"
|
|
assert data["cached_count"] == 2
|
|
assert data["errors"] == []
|
|
|
|
def test_batch_returns_422_on_invalid_body(self, client):
|
|
"""POST batch returns 422 when request body is missing required fields."""
|
|
test_client, _, _ = client
|
|
|
|
# Missing targets entirely
|
|
resp = test_client.post("/api/v1/v2/highlights/batch", json={})
|
|
assert resp.status_code == 422
|
|
|
|
def test_batch_returns_422_on_invalid_target_fields(self, client):
|
|
"""POST batch returns 422 when target objects lack required fields."""
|
|
test_client, _, _ = client
|
|
|
|
payload = {
|
|
"targets": [
|
|
{
|
|
"document_id": "doc1.pdf",
|
|
# missing chunk_index, sub_question_text, sub_question_index
|
|
}
|
|
]
|
|
}
|
|
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
|
|
assert resp.status_code == 422
|
|
|
|
def test_batch_returns_completed_with_matching_mock(self, client, monkeypatch):
|
|
"""POST batch returns status='completed' and cached_count matches mock."""
|
|
test_client, _, _ = client
|
|
|
|
mock_response = HighlightBatchResponse(
|
|
status="completed", cached_count=5, errors=[]
|
|
)
|
|
|
|
async def _mock_compute(self, targets):
|
|
return mock_response
|
|
|
|
monkeypatch.setattr(
|
|
"app.routers.chunks.ChunkHighlightService.compute_highlights_batch",
|
|
_mock_compute,
|
|
)
|
|
|
|
payload = {
|
|
"targets": [
|
|
{
|
|
"document_id": "doc.pdf",
|
|
"chunk_index": 0,
|
|
"sub_question_text": "Q1",
|
|
"sub_question_index": 0,
|
|
}
|
|
]
|
|
}
|
|
resp = test_client.post("/api/v1/v2/highlights/batch", json=payload)
|
|
assert resp.status_code == 200
|
|
data = resp.json()
|
|
assert data["status"] == "completed"
|
|
assert data["cached_count"] == 5
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# GET /api/v1/v2/highlights
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
class TestGetHighlight:
|
|
"""Tests for GET /api/v1/v2/highlights."""
|
|
|
|
def test_get_returns_200_html_on_cache_hit(self, client):
|
|
"""GET returns 200 text/html when cache key exists."""
|
|
test_client, fake_settings, _ = client
|
|
|
|
# Build the same cache the router will use
|
|
db_path = str(fake_settings.prompts_db_path).replace(
|
|
"prompts.db", "highlights.db"
|
|
)
|
|
cache = HighlightCache(db_path=db_path)
|
|
|
|
doc_id = "doc1.pdf"
|
|
chunk_idx = 3
|
|
sub_q = "What is the budget?"
|
|
cache_key = compute_cache_key(doc_id, chunk_idx, sub_q)
|
|
|
|
html_content = "<html><body>highlighted chunk</body></html>"
|
|
cache.set_highlight(
|
|
cache_key=cache_key,
|
|
document_id=doc_id,
|
|
chunk_index=chunk_idx,
|
|
sub_question=sub_q,
|
|
relevant_sentences_json='[]',
|
|
html_content=html_content,
|
|
)
|
|
|
|
resp = test_client.get(
|
|
"/api/v1/v2/highlights",
|
|
params={
|
|
"document_id": doc_id,
|
|
"chunk_index": chunk_idx,
|
|
"sub_question": sub_q,
|
|
},
|
|
)
|
|
assert resp.status_code == 200
|
|
assert resp.headers["content-type"] == "text/html; charset=utf-8"
|
|
assert "highlighted chunk" in resp.text
|
|
|
|
def test_get_returns_404_on_cache_miss(self, client):
|
|
"""GET returns 404 when document_id not in cache."""
|
|
test_client, _, _ = client
|
|
|
|
resp = test_client.get(
|
|
"/api/v1/v2/highlights",
|
|
params={
|
|
"document_id": "nonexistent.pdf",
|
|
"chunk_index": 99,
|
|
"sub_question": "unknown question",
|
|
},
|
|
)
|
|
assert resp.status_code == 404
|
|
|
|
def test_get_returns_404_on_missing_params(self, client):
|
|
"""GET returns 404 (or 422) when required query params are missing."""
|
|
test_client, _, _ = client
|
|
|
|
# Missing all params — FastAPI returns 422 for required Query params
|
|
resp = test_client.get("/api/v1/v2/highlights")
|
|
assert resp.status_code == 422
|