feat(backend): add documents CRUD endpoints and tests

Add 4 REST endpoints for RAG database management: GET /documents, GET /documents/{id}/chunks, DELETE /documents/{id}, DELETE /chunks/{id}. Register documents router in main.py. 8 unit tests covering all CRUD operations.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-04-23 19:02:28 +08:00
parent 178461915a
commit f21085b3df
3 changed files with 272 additions and 1 deletions

View File

@ -6,7 +6,7 @@ from pathlib import Path
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from app.routers import ingest, query
from app.routers import ingest, query, documents
from app.core.config import get_settings
# Configure logging before app initialization
@ -44,6 +44,7 @@ app.add_middleware(
app.include_router(ingest.router, prefix="/api/v1")
app.include_router(query.router, prefix="/api/v1")
app.include_router(documents.router, prefix="/api/v1")
@app.get("/health")

View File

@ -0,0 +1,102 @@
"""Documents CRUD router for RAG Database management."""
import logging
from fastapi import APIRouter, HTTPException
from app.models.documents import (
DocumentInfo,
ChunkInfo,
DocumentListResponse,
DeleteResponse,
)
from app.services.rag import RAGService
logger = logging.getLogger(__name__)
router = APIRouter(tags=["documents"])
@router.get("/documents", response_model=DocumentListResponse)
async def list_documents():
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
doc_list, total_docs, total_chunks = rag.list_documents()
documents = [
DocumentInfo(
document_id=d["document_id"],
filename=d["filename"],
chunk_count=d["chunk_count"],
upload_date=d["upload_date"],
)
for d in doc_list
]
return DocumentListResponse(
documents=documents,
total_documents=total_docs,
total_chunks=total_chunks,
)
@router.get("/documents/{document_id}/chunks", response_model=list[ChunkInfo])
async def list_chunks(document_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
chunks = rag.list_chunks(document_id)
return [
ChunkInfo(
chunk_id=c["chunk_id"],
chunk_index=c["chunk_index"],
content_summary=c["content_summary"],
page_number=c.get("page_number"),
chunk_file_path=c.get("chunk_file_path"),
)
for c in chunks
]
@router.delete("/documents/{document_id}", response_model=DeleteResponse)
async def delete_document(document_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
success, deleted_count = rag.delete_document(document_id)
if not success:
raise HTTPException(status_code=404, detail=f"Document not found: {document_id}")
logger.info("Deleted document %s: %d chunks removed", document_id, deleted_count)
return DeleteResponse(
deleted=True,
message=f"Deleted document {document_id}: {deleted_count} chunks removed",
)
@router.delete("/chunks/{chunk_id}", response_model=DeleteResponse)
async def delete_chunk(chunk_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
success = rag.delete_chunk(chunk_id)
if not success:
raise HTTPException(status_code=404, detail=f"Chunk not found: {chunk_id}")
logger.info("Deleted chunk: %s", chunk_id)
return DeleteResponse(
deleted=True,
message=f"Deleted chunk {chunk_id}",
)

View File

@ -0,0 +1,168 @@
"""Phase 1 tests: Documents CRUD endpoints.
Covers:
- GET /documents listing with chunk counts
- GET /documents/{id}/chunks
- DELETE /documents/{id}
- DELETE /chunks/{id}
"""
import pytest
from fastapi.testclient import TestClient
from unittest.mock import MagicMock, patch
class TestDocumentsRouter:
"""Documents CRUD endpoint tests."""
@pytest.fixture
def client(self):
"""Create test client with mocked dependencies."""
from app.main import app
return TestClient(app)
def test_list_documents_empty(self, client):
"""Should return empty list when no documents exist."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_documents.return_value = ([], 0, 0)
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["documents"] == []
assert data["total_documents"] == 0
assert data["total_chunks"] == 0
def test_list_documents_with_data(self, client):
"""Should return grouped documents with chunk counts."""
doc_list = [
{
"document_id": "abc-123",
"filename": "report.pdf",
"chunk_count": 3,
"upload_date": "2026-04-23",
},
{
"document_id": "def-456",
"filename": "notes.txt",
"chunk_count": 1,
"upload_date": "2026-04-22",
},
]
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_documents.return_value = (doc_list, 2, 4)
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["total_documents"] == 2
assert data["total_chunks"] == 4
assert len(data["documents"]) == 2
assert data["documents"][0]["document_id"] == "abc-123"
assert data["documents"][0]["filename"] == "report.pdf"
assert data["documents"][0]["chunk_count"] == 3
def test_list_chunks_for_document(self, client):
"""Should return all chunks for a given document_id."""
chunks = [
{
"chunk_id": "abc-123_0",
"chunk_index": 0,
"content_summary": "First chunk summary",
"page_number": 1,
"chunk_file_path": None,
},
{
"chunk_id": "abc-123_1",
"chunk_index": 1,
"content_summary": "Second chunk summary",
"page_number": 2,
"chunk_file_path": None,
},
]
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_chunks.return_value = chunks
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents/abc-123/chunks")
assert response.status_code == 200
data = response.json()
assert len(data) == 2
assert data[0]["chunk_id"] == "abc-123_0"
assert data[0]["chunk_index"] == 0
assert data[0]["content_summary"] == "First chunk summary"
assert data[1]["chunk_index"] == 1
def test_list_chunks_document_not_found(self, client):
"""Should return empty list for nonexistent document."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_chunks.return_value = []
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents/nonexistent-id/chunks")
assert response.status_code == 200
data = response.json()
assert data == []
def test_delete_document_success(self, client):
"""Should delete all chunks for a document and return confirmation."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_document.return_value = (True, 3)
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/documents/abc-123")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "3 chunks removed" in data["message"]
def test_delete_document_not_found(self, client):
"""Should return 404 for nonexistent document."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_document.return_value = (False, 0)
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/documents/nonexistent-id")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()
def test_delete_chunk_success(self, client):
"""Should delete a single chunk and return confirmation."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_chunk.return_value = True
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/chunks/abc-123_0")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "abc-123_0" in data["message"]
def test_delete_chunk_not_found(self, client):
"""Should return 404 for nonexistent chunk."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_chunk.return_value = False
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/chunks/nonexistent-chunk")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()