feat(backend): add documents CRUD endpoints and tests

Add 4 REST endpoints for RAG database management: GET /documents, GET /documents/{id}/chunks, DELETE /documents/{id}, DELETE /chunks/{id}. Register documents router in main.py. 8 unit tests covering all CRUD operations.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-04-23 19:02:28 +08:00
parent 178461915a
commit f21085b3df
3 changed files with 272 additions and 1 deletions

View File

@ -6,7 +6,7 @@ from pathlib import Path
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from app.routers import ingest, query from app.routers import ingest, query, documents
from app.core.config import get_settings from app.core.config import get_settings
# Configure logging before app initialization # Configure logging before app initialization
@ -44,6 +44,7 @@ app.add_middleware(
app.include_router(ingest.router, prefix="/api/v1") app.include_router(ingest.router, prefix="/api/v1")
app.include_router(query.router, prefix="/api/v1") app.include_router(query.router, prefix="/api/v1")
app.include_router(documents.router, prefix="/api/v1")
@app.get("/health") @app.get("/health")

View File

@ -0,0 +1,102 @@
"""Documents CRUD router for RAG Database management."""
import logging
from fastapi import APIRouter, HTTPException
from app.models.documents import (
DocumentInfo,
ChunkInfo,
DocumentListResponse,
DeleteResponse,
)
from app.services.rag import RAGService
logger = logging.getLogger(__name__)
router = APIRouter(tags=["documents"])
@router.get("/documents", response_model=DocumentListResponse)
async def list_documents():
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
doc_list, total_docs, total_chunks = rag.list_documents()
documents = [
DocumentInfo(
document_id=d["document_id"],
filename=d["filename"],
chunk_count=d["chunk_count"],
upload_date=d["upload_date"],
)
for d in doc_list
]
return DocumentListResponse(
documents=documents,
total_documents=total_docs,
total_chunks=total_chunks,
)
@router.get("/documents/{document_id}/chunks", response_model=list[ChunkInfo])
async def list_chunks(document_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
chunks = rag.list_chunks(document_id)
return [
ChunkInfo(
chunk_id=c["chunk_id"],
chunk_index=c["chunk_index"],
content_summary=c["content_summary"],
page_number=c.get("page_number"),
chunk_file_path=c.get("chunk_file_path"),
)
for c in chunks
]
@router.delete("/documents/{document_id}", response_model=DeleteResponse)
async def delete_document(document_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
success, deleted_count = rag.delete_document(document_id)
if not success:
raise HTTPException(status_code=404, detail=f"Document not found: {document_id}")
logger.info("Deleted document %s: %d chunks removed", document_id, deleted_count)
return DeleteResponse(
deleted=True,
message=f"Deleted document {document_id}: {deleted_count} chunks removed",
)
@router.delete("/chunks/{chunk_id}", response_model=DeleteResponse)
async def delete_chunk(chunk_id: str):
from app.core.config import get_settings
settings = get_settings()
rag = RAGService(settings=settings)
success = rag.delete_chunk(chunk_id)
if not success:
raise HTTPException(status_code=404, detail=f"Chunk not found: {chunk_id}")
logger.info("Deleted chunk: %s", chunk_id)
return DeleteResponse(
deleted=True,
message=f"Deleted chunk {chunk_id}",
)

View File

@ -0,0 +1,168 @@
"""Phase 1 tests: Documents CRUD endpoints.
Covers:
- GET /documents listing with chunk counts
- GET /documents/{id}/chunks
- DELETE /documents/{id}
- DELETE /chunks/{id}
"""
import pytest
from fastapi.testclient import TestClient
from unittest.mock import MagicMock, patch
class TestDocumentsRouter:
"""Documents CRUD endpoint tests."""
@pytest.fixture
def client(self):
"""Create test client with mocked dependencies."""
from app.main import app
return TestClient(app)
def test_list_documents_empty(self, client):
"""Should return empty list when no documents exist."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_documents.return_value = ([], 0, 0)
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["documents"] == []
assert data["total_documents"] == 0
assert data["total_chunks"] == 0
def test_list_documents_with_data(self, client):
"""Should return grouped documents with chunk counts."""
doc_list = [
{
"document_id": "abc-123",
"filename": "report.pdf",
"chunk_count": 3,
"upload_date": "2026-04-23",
},
{
"document_id": "def-456",
"filename": "notes.txt",
"chunk_count": 1,
"upload_date": "2026-04-22",
},
]
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_documents.return_value = (doc_list, 2, 4)
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents")
assert response.status_code == 200
data = response.json()
assert data["total_documents"] == 2
assert data["total_chunks"] == 4
assert len(data["documents"]) == 2
assert data["documents"][0]["document_id"] == "abc-123"
assert data["documents"][0]["filename"] == "report.pdf"
assert data["documents"][0]["chunk_count"] == 3
def test_list_chunks_for_document(self, client):
"""Should return all chunks for a given document_id."""
chunks = [
{
"chunk_id": "abc-123_0",
"chunk_index": 0,
"content_summary": "First chunk summary",
"page_number": 1,
"chunk_file_path": None,
},
{
"chunk_id": "abc-123_1",
"chunk_index": 1,
"content_summary": "Second chunk summary",
"page_number": 2,
"chunk_file_path": None,
},
]
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_chunks.return_value = chunks
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents/abc-123/chunks")
assert response.status_code == 200
data = response.json()
assert len(data) == 2
assert data[0]["chunk_id"] == "abc-123_0"
assert data[0]["chunk_index"] == 0
assert data[0]["content_summary"] == "First chunk summary"
assert data[1]["chunk_index"] == 1
def test_list_chunks_document_not_found(self, client):
"""Should return empty list for nonexistent document."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.list_chunks.return_value = []
mock_rag_class.return_value = mock_rag
response = client.get("/api/v1/documents/nonexistent-id/chunks")
assert response.status_code == 200
data = response.json()
assert data == []
def test_delete_document_success(self, client):
"""Should delete all chunks for a document and return confirmation."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_document.return_value = (True, 3)
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/documents/abc-123")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "3 chunks removed" in data["message"]
def test_delete_document_not_found(self, client):
"""Should return 404 for nonexistent document."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_document.return_value = (False, 0)
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/documents/nonexistent-id")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()
def test_delete_chunk_success(self, client):
"""Should delete a single chunk and return confirmation."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_chunk.return_value = True
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/chunks/abc-123_0")
assert response.status_code == 200
data = response.json()
assert data["deleted"] is True
assert "abc-123_0" in data["message"]
def test_delete_chunk_not_found(self, client):
"""Should return 404 for nonexistent chunk."""
with patch("app.routers.documents.RAGService") as mock_rag_class:
mock_rag = MagicMock()
mock_rag.delete_chunk.return_value = False
mock_rag_class.return_value = mock_rag
response = client.delete("/api/v1/chunks/nonexistent-chunk")
assert response.status_code == 404
assert "not found" in response.json()["detail"].lower()