legco_ai_assistant/backend/app/test/test_phase1_documents_route...

167 lines
5.4 KiB
Python

"""Phase 1 tests: Documents CRUD endpoints.
Covers:
- GET /documents listing with chunk counts
- GET /documents/{id}/chunks
- DELETE /documents/{id}
- DELETE /chunks/{id}
Uses real ChromaDB via tmp_path + TestClient — no mocks on internal services.
"""
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client(tmp_path, monkeypatch):
"""TestClient with real ChromaDB isolated in tmp_path."""
chroma_dir = tmp_path / "chroma_test"
chunk_dir = tmp_path / "chunks"
chunk_dir.mkdir()
monkeypatch.setenv("CHROMA_DB_PATH", str(chroma_dir))
monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir))
from app.core.config import get_settings
get_settings.cache_clear()
from app.main import app
yield TestClient(app)
get_settings.cache_clear()
def _seed_document(tmp_path, monkeypatch, document_id, filename, num_chunks, chunk_file_paths=None):
"""Ingest test document into the real ChromaDB used by the client fixture.
Must be called AFTER the `client` fixture has been established so that
get_settings() resolves to the same tmp_path ChromaDB directory.
"""
from app.core.config import get_settings
from app.services.rag import RAGService
settings = get_settings()
rag = RAGService(settings=settings)
chunks = [f"chunk content {i}" for i in range(num_chunks)]
metadata_list = []
for i in range(num_chunks):
meta = {
"filename": filename,
"upload_date": "2026-04-23",
"content_summary": f"summary {i}",
"chunk_index": i,
}
if chunk_file_paths and i < len(chunk_file_paths):
meta["chunk_file_path"] = chunk_file_paths[i]
metadata_list.append(meta)
rag.ingest_document(
file_path=filename,
chunks=chunks,
metadata_list=metadata_list,
document_id=document_id,
)
return document_id
def test_list_documents_empty(client):
"""Should return empty list when no documents exist."""
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["documents"] == []
assert data["total_documents"] == 0
assert data["total_chunks"] == 0
def test_list_documents_with_data(client, tmp_path, monkeypatch):
"""Should return grouped documents with chunk counts."""
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3)
_seed_document(tmp_path, monkeypatch, "def-456", "notes.txt", 1)
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["total_documents"] == 2
assert data["total_chunks"] == 4
assert len(data["documents"]) == 2
by_id = {d["document_id"]: d for d in data["documents"]}
assert by_id["abc-123"]["filename"] == "report.pdf"
assert by_id["abc-123"]["chunk_count"] == 3
assert by_id["def-456"]["filename"] == "notes.txt"
assert by_id["def-456"]["chunk_count"] == 1
def test_list_chunks_for_document(client, tmp_path, monkeypatch):
"""Should return all chunks for a given document_id."""
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2)
response = client.get("/api/v1/documents/abc-123/chunks")
assert response.status_code == 200
data = response.json()
assert len(data) == 2
assert data[0]["chunk_id"] == "abc-123_0"
assert data[0]["chunk_index"] == 0
assert data[0]["content_summary"] == "summary 0"
assert data[1]["chunk_index"] == 1
def test_list_chunks_document_not_found(client):
"""Should return empty list for nonexistent document."""
response = client.get("/api/v1/documents/nonexistent-id/chunks")
assert response.status_code == 200
data = response.json()
assert data == []
def test_delete_document_success(client, tmp_path, monkeypatch):
"""Should delete all chunks for a document and return confirmation."""
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3)
response = client.delete("/api/v1/documents/abc-123")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "3 chunks removed" in data["message"]
# Verify actually deleted
response = client.get("/api/v1/documents")
assert response.json()["total_documents"] == 0
def test_delete_document_not_found(client):
"""Should return 404 for nonexistent document."""
response = client.delete("/api/v1/documents/nonexistent-id")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()
def test_delete_chunk_success(client, tmp_path, monkeypatch):
"""Should delete a single chunk and return confirmation."""
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2)
response = client.delete("/api/v1/chunks/abc-123_0")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "abc-123_0" in data["message"]
# Verify chunk gone but other chunk remains
response = client.get("/api/v1/documents/abc-123/chunks")
chunks = response.json()
assert len(chunks) == 1
assert chunks[0]["chunk_id"] == "abc-123_1"
def test_delete_chunk_not_found(client):
"""Should return 404 for nonexistent chunk."""
response = client.delete("/api/v1/chunks/nonexistent-chunk")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()