From f21085b3df7578c7ab674c0765bd7bc6ed1f86f8 Mon Sep 17 00:00:00 2001 From: Woody Date: Thu, 23 Apr 2026 19:02:28 +0800 Subject: [PATCH] feat(backend): add documents CRUD endpoints and tests Add 4 REST endpoints for RAG database management: GET /documents, GET /documents/{id}/chunks, DELETE /documents/{id}, DELETE /chunks/{id}. Register documents router in main.py. 8 unit tests covering all CRUD operations. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- backend/app/main.py | 3 +- backend/app/routers/documents.py | 102 +++++++++++ .../app/test/test_phase1_documents_router.py | 168 ++++++++++++++++++ 3 files changed, 272 insertions(+), 1 deletion(-) create mode 100644 backend/app/routers/documents.py create mode 100644 backend/app/test/test_phase1_documents_router.py diff --git a/backend/app/main.py b/backend/app/main.py index 8ddb909..e630eb6 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -6,7 +6,7 @@ from pathlib import Path from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware -from app.routers import ingest, query +from app.routers import ingest, query, documents from app.core.config import get_settings # Configure logging before app initialization @@ -44,6 +44,7 @@ app.add_middleware( app.include_router(ingest.router, prefix="/api/v1") app.include_router(query.router, prefix="/api/v1") +app.include_router(documents.router, prefix="/api/v1") @app.get("/health") diff --git a/backend/app/routers/documents.py b/backend/app/routers/documents.py new file mode 100644 index 0000000..a9d6240 --- /dev/null +++ b/backend/app/routers/documents.py @@ -0,0 +1,102 @@ +"""Documents CRUD router for RAG Database management.""" +import logging + +from fastapi import APIRouter, HTTPException + +from app.models.documents import ( + DocumentInfo, + ChunkInfo, + DocumentListResponse, + DeleteResponse, +) +from app.services.rag import RAGService + +logger = logging.getLogger(__name__) +router = APIRouter(tags=["documents"]) + + +@router.get("/documents", response_model=DocumentListResponse) +async def list_documents(): + from app.core.config import get_settings + + settings = get_settings() + rag = RAGService(settings=settings) + + doc_list, total_docs, total_chunks = rag.list_documents() + + documents = [ + DocumentInfo( + document_id=d["document_id"], + filename=d["filename"], + chunk_count=d["chunk_count"], + upload_date=d["upload_date"], + ) + for d in doc_list + ] + + return DocumentListResponse( + documents=documents, + total_documents=total_docs, + total_chunks=total_chunks, + ) + + +@router.get("/documents/{document_id}/chunks", response_model=list[ChunkInfo]) +async def list_chunks(document_id: str): + from app.core.config import get_settings + + settings = get_settings() + rag = RAGService(settings=settings) + + chunks = rag.list_chunks(document_id) + + return [ + ChunkInfo( + chunk_id=c["chunk_id"], + chunk_index=c["chunk_index"], + content_summary=c["content_summary"], + page_number=c.get("page_number"), + chunk_file_path=c.get("chunk_file_path"), + ) + for c in chunks + ] + + +@router.delete("/documents/{document_id}", response_model=DeleteResponse) +async def delete_document(document_id: str): + from app.core.config import get_settings + + settings = get_settings() + rag = RAGService(settings=settings) + + success, deleted_count = rag.delete_document(document_id) + + if not success: + raise HTTPException(status_code=404, detail=f"Document not found: {document_id}") + + logger.info("Deleted document %s: %d chunks removed", document_id, deleted_count) + + return DeleteResponse( + deleted=True, + message=f"Deleted document {document_id}: {deleted_count} chunks removed", + ) + + +@router.delete("/chunks/{chunk_id}", response_model=DeleteResponse) +async def delete_chunk(chunk_id: str): + from app.core.config import get_settings + + settings = get_settings() + rag = RAGService(settings=settings) + + success = rag.delete_chunk(chunk_id) + + if not success: + raise HTTPException(status_code=404, detail=f"Chunk not found: {chunk_id}") + + logger.info("Deleted chunk: %s", chunk_id) + + return DeleteResponse( + deleted=True, + message=f"Deleted chunk {chunk_id}", + ) diff --git a/backend/app/test/test_phase1_documents_router.py b/backend/app/test/test_phase1_documents_router.py new file mode 100644 index 0000000..2601925 --- /dev/null +++ b/backend/app/test/test_phase1_documents_router.py @@ -0,0 +1,168 @@ +"""Phase 1 tests: Documents CRUD endpoints. + +Covers: +- GET /documents listing with chunk counts +- GET /documents/{id}/chunks +- DELETE /documents/{id} +- DELETE /chunks/{id} +""" +import pytest +from fastapi.testclient import TestClient +from unittest.mock import MagicMock, patch + + +class TestDocumentsRouter: + """Documents CRUD endpoint tests.""" + + @pytest.fixture + def client(self): + """Create test client with mocked dependencies.""" + from app.main import app + return TestClient(app) + + def test_list_documents_empty(self, client): + """Should return empty list when no documents exist.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.list_documents.return_value = ([], 0, 0) + mock_rag_class.return_value = mock_rag + + response = client.get("/api/v1/documents") + + assert response.status_code == 200 + data = response.json() + assert data["documents"] == [] + assert data["total_documents"] == 0 + assert data["total_chunks"] == 0 + + def test_list_documents_with_data(self, client): + """Should return grouped documents with chunk counts.""" + doc_list = [ + { + "document_id": "abc-123", + "filename": "report.pdf", + "chunk_count": 3, + "upload_date": "2026-04-23", + }, + { + "document_id": "def-456", + "filename": "notes.txt", + "chunk_count": 1, + "upload_date": "2026-04-22", + }, + ] + + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.list_documents.return_value = (doc_list, 2, 4) + mock_rag_class.return_value = mock_rag + + response = client.get("/api/v1/documents") + + assert response.status_code == 200 + data = response.json() + assert data["total_documents"] == 2 + assert data["total_chunks"] == 4 + assert len(data["documents"]) == 2 + assert data["documents"][0]["document_id"] == "abc-123" + assert data["documents"][0]["filename"] == "report.pdf" + assert data["documents"][0]["chunk_count"] == 3 + + def test_list_chunks_for_document(self, client): + """Should return all chunks for a given document_id.""" + chunks = [ + { + "chunk_id": "abc-123_0", + "chunk_index": 0, + "content_summary": "First chunk summary", + "page_number": 1, + "chunk_file_path": None, + }, + { + "chunk_id": "abc-123_1", + "chunk_index": 1, + "content_summary": "Second chunk summary", + "page_number": 2, + "chunk_file_path": None, + }, + ] + + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.list_chunks.return_value = chunks + mock_rag_class.return_value = mock_rag + + response = client.get("/api/v1/documents/abc-123/chunks") + + assert response.status_code == 200 + data = response.json() + assert len(data) == 2 + assert data[0]["chunk_id"] == "abc-123_0" + assert data[0]["chunk_index"] == 0 + assert data[0]["content_summary"] == "First chunk summary" + assert data[1]["chunk_index"] == 1 + + def test_list_chunks_document_not_found(self, client): + """Should return empty list for nonexistent document.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.list_chunks.return_value = [] + mock_rag_class.return_value = mock_rag + + response = client.get("/api/v1/documents/nonexistent-id/chunks") + + assert response.status_code == 200 + data = response.json() + assert data == [] + + def test_delete_document_success(self, client): + """Should delete all chunks for a document and return confirmation.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.delete_document.return_value = (True, 3) + mock_rag_class.return_value = mock_rag + + response = client.delete("/api/v1/documents/abc-123") + + assert response.status_code == 200 + data = response.json() + assert data["deleted"] is True + assert "3 chunks removed" in data["message"] + + def test_delete_document_not_found(self, client): + """Should return 404 for nonexistent document.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.delete_document.return_value = (False, 0) + mock_rag_class.return_value = mock_rag + + response = client.delete("/api/v1/documents/nonexistent-id") + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower() + + def test_delete_chunk_success(self, client): + """Should delete a single chunk and return confirmation.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.delete_chunk.return_value = True + mock_rag_class.return_value = mock_rag + + response = client.delete("/api/v1/chunks/abc-123_0") + + assert response.status_code == 200 + data = response.json() + assert data["deleted"] is True + assert "abc-123_0" in data["message"] + + def test_delete_chunk_not_found(self, client): + """Should return 404 for nonexistent chunk.""" + with patch("app.routers.documents.RAGService") as mock_rag_class: + mock_rag = MagicMock() + mock_rag.delete_chunk.return_value = False + mock_rag_class.return_value = mock_rag + + response = client.delete("/api/v1/chunks/nonexistent-chunk") + + assert response.status_code == 404 + assert "not found" in response.json()["detail"].lower()