"""Phase 1 tests: Document ingestion endpoint. Covers: - POST /api/v1/ingest with valid documents - Metadata extraction (filename, upload_date, content_summary) - ChromaDB persistence with embeddings - Error handling for unsupported file types """ import pytest from fastapi.testclient import TestClient from unittest.mock import MagicMock, patch class TestIngest: """Document upload and ChromaDB ingestion tests.""" @pytest.fixture def client(self): """Create test client with mocked dependencies.""" from app.main import app return TestClient(app) def test_ingest_pdf_success(self, client, tmp_path): """Should ingest PDF and return document ID with metadata.""" import io with patch("app.services.rag.RAGService") as mock_rag_class: mock_rag = MagicMock() mock_rag.ingest_document.return_value = "doc-123" mock_rag_class.return_value = mock_rag with patch("app.utils.pdf_parser.parse_pdf") as mock_parse: mock_parse.return_value = "Parsed PDF text content" with patch("app.utils.chunking.TokenChunkingStrategy") as mock_chunk_class: mock_chunker = MagicMock() mock_chunker.chunk.return_value = ["chunk 1", "chunk 2"] mock_chunk_class.return_value = mock_chunker with patch("app.utils.metadata.extract_metadata") as mock_meta: mock_meta.return_value = [ {"filename": "test.pdf", "chunk_index": 0}, {"filename": "test.pdf", "chunk_index": 1}, ] response = client.post( "/api/v1/ingest", files={"file": ("test.pdf", io.BytesIO(b"%PDF-1.4"), "application/pdf")}, ) assert response.status_code == 200 data = response.json() assert "document_id" in data assert data["chunk_count"] == 2 assert data["filename"] == "test.pdf" def test_ingest_docx_success(self, client, tmp_path): """Should ingest DOCX and return document ID with metadata.""" import io with patch("app.services.rag.RAGService") as mock_rag_class: mock_rag = MagicMock() mock_rag.ingest_document.return_value = "doc-456" mock_rag_class.return_value = mock_rag with patch("app.utils.docx_parser.parse_docx") as mock_parse: mock_parse.return_value = "Parsed DOCX text content" with patch("app.utils.chunking.TokenChunkingStrategy") as mock_chunk_class: mock_chunker = MagicMock() mock_chunker.chunk.return_value = ["chunk 1"] mock_chunk_class.return_value = mock_chunker with patch("app.utils.metadata.extract_metadata") as mock_meta: mock_meta.return_value = [{"filename": "test.docx", "chunk_index": 0}] response = client.post( "/api/v1/ingest", files={"file": ("test.docx", io.BytesIO(b"docx content"), "application/vnd.openxmlformats-officedocument.wordprocessingml.document")}, ) assert response.status_code == 200 data = response.json() assert data["chunk_count"] == 1 assert data["filename"] == "test.docx" def test_ingest_unsupported_format(self, client): """Should reject unsupported file formats.""" import io response = client.post( "/api/v1/ingest", files={"file": ("test.jpg", io.BytesIO(b"image data"), "image/jpeg")}, ) assert response.status_code == 400 assert "unsupported" in response.json()["detail"].lower() def test_ingest_no_file(self, client): """Should reject request without file.""" response = client.post("/api/v1/ingest") assert response.status_code == 422