feat(backend): add documents CRUD endpoints and tests
Add 4 REST endpoints for RAG database management: GET /documents, GET /documents/{id}/chunks, DELETE /documents/{id}, DELETE /chunks/{id}. Register documents router in main.py. 8 unit tests covering all CRUD operations.
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)
Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
178461915a
commit
f21085b3df
|
|
@ -6,7 +6,7 @@ from pathlib import Path
|
|||
from fastapi import FastAPI
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
|
||||
from app.routers import ingest, query
|
||||
from app.routers import ingest, query, documents
|
||||
from app.core.config import get_settings
|
||||
|
||||
# Configure logging before app initialization
|
||||
|
|
@ -44,6 +44,7 @@ app.add_middleware(
|
|||
|
||||
app.include_router(ingest.router, prefix="/api/v1")
|
||||
app.include_router(query.router, prefix="/api/v1")
|
||||
app.include_router(documents.router, prefix="/api/v1")
|
||||
|
||||
|
||||
@app.get("/health")
|
||||
|
|
|
|||
|
|
@ -0,0 +1,102 @@
|
|||
"""Documents CRUD router for RAG Database management."""
|
||||
import logging
|
||||
|
||||
from fastapi import APIRouter, HTTPException
|
||||
|
||||
from app.models.documents import (
|
||||
DocumentInfo,
|
||||
ChunkInfo,
|
||||
DocumentListResponse,
|
||||
DeleteResponse,
|
||||
)
|
||||
from app.services.rag import RAGService
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
router = APIRouter(tags=["documents"])
|
||||
|
||||
|
||||
@router.get("/documents", response_model=DocumentListResponse)
|
||||
async def list_documents():
|
||||
from app.core.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
rag = RAGService(settings=settings)
|
||||
|
||||
doc_list, total_docs, total_chunks = rag.list_documents()
|
||||
|
||||
documents = [
|
||||
DocumentInfo(
|
||||
document_id=d["document_id"],
|
||||
filename=d["filename"],
|
||||
chunk_count=d["chunk_count"],
|
||||
upload_date=d["upload_date"],
|
||||
)
|
||||
for d in doc_list
|
||||
]
|
||||
|
||||
return DocumentListResponse(
|
||||
documents=documents,
|
||||
total_documents=total_docs,
|
||||
total_chunks=total_chunks,
|
||||
)
|
||||
|
||||
|
||||
@router.get("/documents/{document_id}/chunks", response_model=list[ChunkInfo])
|
||||
async def list_chunks(document_id: str):
|
||||
from app.core.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
rag = RAGService(settings=settings)
|
||||
|
||||
chunks = rag.list_chunks(document_id)
|
||||
|
||||
return [
|
||||
ChunkInfo(
|
||||
chunk_id=c["chunk_id"],
|
||||
chunk_index=c["chunk_index"],
|
||||
content_summary=c["content_summary"],
|
||||
page_number=c.get("page_number"),
|
||||
chunk_file_path=c.get("chunk_file_path"),
|
||||
)
|
||||
for c in chunks
|
||||
]
|
||||
|
||||
|
||||
@router.delete("/documents/{document_id}", response_model=DeleteResponse)
|
||||
async def delete_document(document_id: str):
|
||||
from app.core.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
rag = RAGService(settings=settings)
|
||||
|
||||
success, deleted_count = rag.delete_document(document_id)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail=f"Document not found: {document_id}")
|
||||
|
||||
logger.info("Deleted document %s: %d chunks removed", document_id, deleted_count)
|
||||
|
||||
return DeleteResponse(
|
||||
deleted=True,
|
||||
message=f"Deleted document {document_id}: {deleted_count} chunks removed",
|
||||
)
|
||||
|
||||
|
||||
@router.delete("/chunks/{chunk_id}", response_model=DeleteResponse)
|
||||
async def delete_chunk(chunk_id: str):
|
||||
from app.core.config import get_settings
|
||||
|
||||
settings = get_settings()
|
||||
rag = RAGService(settings=settings)
|
||||
|
||||
success = rag.delete_chunk(chunk_id)
|
||||
|
||||
if not success:
|
||||
raise HTTPException(status_code=404, detail=f"Chunk not found: {chunk_id}")
|
||||
|
||||
logger.info("Deleted chunk: %s", chunk_id)
|
||||
|
||||
return DeleteResponse(
|
||||
deleted=True,
|
||||
message=f"Deleted chunk {chunk_id}",
|
||||
)
|
||||
|
|
@ -0,0 +1,168 @@
|
|||
"""Phase 1 tests: Documents CRUD endpoints.
|
||||
|
||||
Covers:
|
||||
- GET /documents listing with chunk counts
|
||||
- GET /documents/{id}/chunks
|
||||
- DELETE /documents/{id}
|
||||
- DELETE /chunks/{id}
|
||||
"""
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
class TestDocumentsRouter:
|
||||
"""Documents CRUD endpoint tests."""
|
||||
|
||||
@pytest.fixture
|
||||
def client(self):
|
||||
"""Create test client with mocked dependencies."""
|
||||
from app.main import app
|
||||
return TestClient(app)
|
||||
|
||||
def test_list_documents_empty(self, client):
|
||||
"""Should return empty list when no documents exist."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.list_documents.return_value = ([], 0, 0)
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.get("/api/v1/documents")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["documents"] == []
|
||||
assert data["total_documents"] == 0
|
||||
assert data["total_chunks"] == 0
|
||||
|
||||
def test_list_documents_with_data(self, client):
|
||||
"""Should return grouped documents with chunk counts."""
|
||||
doc_list = [
|
||||
{
|
||||
"document_id": "abc-123",
|
||||
"filename": "report.pdf",
|
||||
"chunk_count": 3,
|
||||
"upload_date": "2026-04-23",
|
||||
},
|
||||
{
|
||||
"document_id": "def-456",
|
||||
"filename": "notes.txt",
|
||||
"chunk_count": 1,
|
||||
"upload_date": "2026-04-22",
|
||||
},
|
||||
]
|
||||
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.list_documents.return_value = (doc_list, 2, 4)
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.get("/api/v1/documents")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["total_documents"] == 2
|
||||
assert data["total_chunks"] == 4
|
||||
assert len(data["documents"]) == 2
|
||||
assert data["documents"][0]["document_id"] == "abc-123"
|
||||
assert data["documents"][0]["filename"] == "report.pdf"
|
||||
assert data["documents"][0]["chunk_count"] == 3
|
||||
|
||||
def test_list_chunks_for_document(self, client):
|
||||
"""Should return all chunks for a given document_id."""
|
||||
chunks = [
|
||||
{
|
||||
"chunk_id": "abc-123_0",
|
||||
"chunk_index": 0,
|
||||
"content_summary": "First chunk summary",
|
||||
"page_number": 1,
|
||||
"chunk_file_path": None,
|
||||
},
|
||||
{
|
||||
"chunk_id": "abc-123_1",
|
||||
"chunk_index": 1,
|
||||
"content_summary": "Second chunk summary",
|
||||
"page_number": 2,
|
||||
"chunk_file_path": None,
|
||||
},
|
||||
]
|
||||
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.list_chunks.return_value = chunks
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.get("/api/v1/documents/abc-123/chunks")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert len(data) == 2
|
||||
assert data[0]["chunk_id"] == "abc-123_0"
|
||||
assert data[0]["chunk_index"] == 0
|
||||
assert data[0]["content_summary"] == "First chunk summary"
|
||||
assert data[1]["chunk_index"] == 1
|
||||
|
||||
def test_list_chunks_document_not_found(self, client):
|
||||
"""Should return empty list for nonexistent document."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.list_chunks.return_value = []
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.get("/api/v1/documents/nonexistent-id/chunks")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data == []
|
||||
|
||||
def test_delete_document_success(self, client):
|
||||
"""Should delete all chunks for a document and return confirmation."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.delete_document.return_value = (True, 3)
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.delete("/api/v1/documents/abc-123")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["deleted"] is True
|
||||
assert "3 chunks removed" in data["message"]
|
||||
|
||||
def test_delete_document_not_found(self, client):
|
||||
"""Should return 404 for nonexistent document."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.delete_document.return_value = (False, 0)
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.delete("/api/v1/documents/nonexistent-id")
|
||||
|
||||
assert response.status_code == 404
|
||||
assert "not found" in response.json()["detail"].lower()
|
||||
|
||||
def test_delete_chunk_success(self, client):
|
||||
"""Should delete a single chunk and return confirmation."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.delete_chunk.return_value = True
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.delete("/api/v1/chunks/abc-123_0")
|
||||
|
||||
assert response.status_code == 200
|
||||
data = response.json()
|
||||
assert data["deleted"] is True
|
||||
assert "abc-123_0" in data["message"]
|
||||
|
||||
def test_delete_chunk_not_found(self, client):
|
||||
"""Should return 404 for nonexistent chunk."""
|
||||
with patch("app.routers.documents.RAGService") as mock_rag_class:
|
||||
mock_rag = MagicMock()
|
||||
mock_rag.delete_chunk.return_value = False
|
||||
mock_rag_class.return_value = mock_rag
|
||||
|
||||
response = client.delete("/api/v1/chunks/nonexistent-chunk")
|
||||
|
||||
assert response.status_code == 404
|
||||
assert "not found" in response.json()["detail"].lower()
|
||||
Loading…
Reference in New Issue