"""Phase 1 tests: Documents CRUD endpoints. Covers: - GET /documents listing with chunk counts - GET /documents/{id}/chunks - DELETE /documents/{id} - DELETE /chunks/{id} Uses real ChromaDB via tmp_path + TestClient — no mocks on internal services. """ import pytest from fastapi.testclient import TestClient @pytest.fixture def client(tmp_path, monkeypatch): """TestClient with real ChromaDB isolated in tmp_path.""" chroma_dir = tmp_path / "chroma_test" chunk_dir = tmp_path / "chunks" chunk_dir.mkdir() monkeypatch.setenv("CHROMA_DB_PATH", str(chroma_dir)) monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir)) from app.core.config import get_settings get_settings.cache_clear() from app.main import app yield TestClient(app) get_settings.cache_clear() def _seed_document(tmp_path, monkeypatch, document_id, filename, num_chunks, chunk_file_paths=None): """Ingest test document into the real ChromaDB used by the client fixture. Must be called AFTER the `client` fixture has been established so that get_settings() resolves to the same tmp_path ChromaDB directory. """ from app.core.config import get_settings from app.services.rag import RAGService settings = get_settings() rag = RAGService(settings=settings) chunks = [f"chunk content {i}" for i in range(num_chunks)] metadata_list = [] for i in range(num_chunks): meta = { "filename": filename, "upload_date": "2026-04-23", "content_summary": f"summary {i}", "chunk_index": i, } if chunk_file_paths and i < len(chunk_file_paths): meta["chunk_file_path"] = chunk_file_paths[i] metadata_list.append(meta) rag.ingest_document( file_path=filename, chunks=chunks, metadata_list=metadata_list, document_id=document_id, ) return document_id def test_list_documents_empty(client): """Should return empty list when no documents exist.""" response = client.get("/api/v1/documents") assert response.status_code == 200 data = response.json() assert data["documents"] == [] assert data["total_documents"] == 0 assert data["total_chunks"] == 0 def test_list_documents_with_data(client, tmp_path, monkeypatch): """Should return grouped documents with chunk counts.""" _seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3) _seed_document(tmp_path, monkeypatch, "def-456", "notes.txt", 1) response = client.get("/api/v1/documents") assert response.status_code == 200 data = response.json() assert data["total_documents"] == 2 assert data["total_chunks"] == 4 assert len(data["documents"]) == 2 by_id = {d["document_id"]: d for d in data["documents"]} assert by_id["abc-123"]["filename"] == "report.pdf" assert by_id["abc-123"]["chunk_count"] == 3 assert by_id["def-456"]["filename"] == "notes.txt" assert by_id["def-456"]["chunk_count"] == 1 def test_list_chunks_for_document(client, tmp_path, monkeypatch): """Should return all chunks for a given document_id.""" _seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2) response = client.get("/api/v1/documents/abc-123/chunks") assert response.status_code == 200 data = response.json() assert len(data) == 2 assert data[0]["chunk_id"] == "abc-123_0" assert data[0]["chunk_index"] == 0 assert data[0]["content_summary"] == "summary 0" assert data[1]["chunk_index"] == 1 def test_list_chunks_document_not_found(client): """Should return empty list for nonexistent document.""" response = client.get("/api/v1/documents/nonexistent-id/chunks") assert response.status_code == 200 data = response.json() assert data == [] def test_delete_document_success(client, tmp_path, monkeypatch): """Should delete all chunks for a document and return confirmation.""" _seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3) response = client.delete("/api/v1/documents/abc-123") assert response.status_code == 200 data = response.json() assert data["deleted"] is True assert "3 chunks removed" in data["message"] # Verify actually deleted response = client.get("/api/v1/documents") assert response.json()["total_documents"] == 0 def test_delete_document_not_found(client): """Should return 404 for nonexistent document.""" response = client.delete("/api/v1/documents/nonexistent-id") assert response.status_code == 404 assert "not found" in response.json()["detail"].lower() def test_delete_chunk_success(client, tmp_path, monkeypatch): """Should delete a single chunk and return confirmation.""" _seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2) response = client.delete("/api/v1/chunks/abc-123_0") assert response.status_code == 200 data = response.json() assert data["deleted"] is True assert "abc-123_0" in data["message"] # Verify chunk gone but other chunk remains response = client.get("/api/v1/documents/abc-123/chunks") chunks = response.json() assert len(chunks) == 1 assert chunks[0]["chunk_id"] == "abc-123_1" def test_delete_chunk_not_found(client): """Should return 404 for nonexistent chunk.""" response = client.delete("/api/v1/chunks/nonexistent-chunk") assert response.status_code == 404 assert "not found" in response.json()["detail"].lower()