167 lines
5.4 KiB
Python
167 lines
5.4 KiB
Python
"""Phase 1 tests: Documents CRUD endpoints.
|
|
|
|
Covers:
|
|
- GET /documents listing with chunk counts
|
|
- GET /documents/{id}/chunks
|
|
- DELETE /documents/{id}
|
|
- DELETE /chunks/{id}
|
|
|
|
Uses real ChromaDB via tmp_path + TestClient — no mocks on internal services.
|
|
"""
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
"""TestClient with real ChromaDB isolated in tmp_path."""
|
|
chroma_dir = tmp_path / "chroma_test"
|
|
chunk_dir = tmp_path / "chunks"
|
|
chunk_dir.mkdir()
|
|
monkeypatch.setenv("CHROMA_DB_PATH", str(chroma_dir))
|
|
monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir))
|
|
from app.core.config import get_settings
|
|
get_settings.cache_clear()
|
|
from app.main import app
|
|
yield TestClient(app)
|
|
get_settings.cache_clear()
|
|
|
|
|
|
def _seed_document(tmp_path, monkeypatch, document_id, filename, num_chunks, chunk_file_paths=None):
|
|
"""Ingest test document into the real ChromaDB used by the client fixture.
|
|
|
|
Must be called AFTER the `client` fixture has been established so that
|
|
get_settings() resolves to the same tmp_path ChromaDB directory.
|
|
"""
|
|
from app.core.config import get_settings
|
|
from app.services.rag import RAGService
|
|
|
|
settings = get_settings()
|
|
rag = RAGService(settings=settings)
|
|
|
|
chunks = [f"chunk content {i}" for i in range(num_chunks)]
|
|
metadata_list = []
|
|
for i in range(num_chunks):
|
|
meta = {
|
|
"filename": filename,
|
|
"upload_date": "2026-04-23",
|
|
"content_summary": f"summary {i}",
|
|
"chunk_index": i,
|
|
}
|
|
if chunk_file_paths and i < len(chunk_file_paths):
|
|
meta["chunk_file_path"] = chunk_file_paths[i]
|
|
metadata_list.append(meta)
|
|
|
|
rag.ingest_document(
|
|
file_path=filename,
|
|
chunks=chunks,
|
|
metadata_list=metadata_list,
|
|
document_id=document_id,
|
|
)
|
|
return document_id
|
|
|
|
|
|
def test_list_documents_empty(client):
|
|
"""Should return empty list when no documents exist."""
|
|
response = client.get("/api/v1/documents")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["documents"] == []
|
|
assert data["total_documents"] == 0
|
|
assert data["total_chunks"] == 0
|
|
|
|
|
|
def test_list_documents_with_data(client, tmp_path, monkeypatch):
|
|
"""Should return grouped documents with chunk counts."""
|
|
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3)
|
|
_seed_document(tmp_path, monkeypatch, "def-456", "notes.txt", 1)
|
|
|
|
response = client.get("/api/v1/documents")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["total_documents"] == 2
|
|
assert data["total_chunks"] == 4
|
|
assert len(data["documents"]) == 2
|
|
|
|
by_id = {d["document_id"]: d for d in data["documents"]}
|
|
assert by_id["abc-123"]["filename"] == "report.pdf"
|
|
assert by_id["abc-123"]["chunk_count"] == 3
|
|
assert by_id["def-456"]["filename"] == "notes.txt"
|
|
assert by_id["def-456"]["chunk_count"] == 1
|
|
|
|
|
|
def test_list_chunks_for_document(client, tmp_path, monkeypatch):
|
|
"""Should return all chunks for a given document_id."""
|
|
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2)
|
|
|
|
response = client.get("/api/v1/documents/abc-123/chunks")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert len(data) == 2
|
|
assert data[0]["chunk_id"] == "abc-123_0"
|
|
assert data[0]["chunk_index"] == 0
|
|
assert data[0]["content_summary"] == "summary 0"
|
|
assert data[1]["chunk_index"] == 1
|
|
|
|
|
|
def test_list_chunks_document_not_found(client):
|
|
"""Should return empty list for nonexistent document."""
|
|
response = client.get("/api/v1/documents/nonexistent-id/chunks")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data == []
|
|
|
|
|
|
def test_delete_document_success(client, tmp_path, monkeypatch):
|
|
"""Should delete all chunks for a document and return confirmation."""
|
|
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 3)
|
|
|
|
response = client.delete("/api/v1/documents/abc-123")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["deleted"] is True
|
|
assert "3 chunks removed" in data["message"]
|
|
|
|
# Verify actually deleted
|
|
response = client.get("/api/v1/documents")
|
|
assert response.json()["total_documents"] == 0
|
|
|
|
|
|
def test_delete_document_not_found(client):
|
|
"""Should return 404 for nonexistent document."""
|
|
response = client.delete("/api/v1/documents/nonexistent-id")
|
|
|
|
assert response.status_code == 404
|
|
assert "not found" in response.json()["detail"].lower()
|
|
|
|
|
|
def test_delete_chunk_success(client, tmp_path, monkeypatch):
|
|
"""Should delete a single chunk and return confirmation."""
|
|
_seed_document(tmp_path, monkeypatch, "abc-123", "report.pdf", 2)
|
|
|
|
response = client.delete("/api/v1/chunks/abc-123_0")
|
|
|
|
assert response.status_code == 200
|
|
data = response.json()
|
|
assert data["deleted"] is True
|
|
assert "abc-123_0" in data["message"]
|
|
|
|
# Verify chunk gone but other chunk remains
|
|
response = client.get("/api/v1/documents/abc-123/chunks")
|
|
chunks = response.json()
|
|
assert len(chunks) == 1
|
|
assert chunks[0]["chunk_id"] == "abc-123_1"
|
|
|
|
|
|
def test_delete_chunk_not_found(client):
|
|
"""Should return 404 for nonexistent chunk."""
|
|
response = client.delete("/api/v1/chunks/nonexistent-chunk")
|
|
|
|
assert response.status_code == 404
|
|
assert "not found" in response.json()["detail"].lower()
|