"""Tests for chunk PDF file serving endpoint. Coverage: - GET /api/v1/chunks/{file_path}/pdf — success, 404, path traversal 400 Uses real filesystem (tmp_path) via monkeypatch.setenv — no mocks on internal services. """ import os import pytest from fastapi.testclient import TestClient @pytest.fixture def client(tmp_path, monkeypatch): """TestClient with DOCUMENT_CHUNK_PATH pointing to a temp directory.""" chunk_dir = tmp_path / "chunks" chunk_dir.mkdir() monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir)) monkeypatch.setenv("CHROMA_DB_PATH", str(tmp_path / "chroma_test")) from app.core.config import get_settings get_settings.cache_clear() from app.main import app yield TestClient(app) get_settings.cache_clear() def test_get_chunk_pdf_success(client, tmp_path): """Should serve chunk PDF file with 200 and application/pdf.""" chunk_dir = tmp_path / "chunks" test_file = chunk_dir / "test_page_1.pdf" test_file.write_bytes(b"%PDF-1.4 fake content") response = client.get("/api/v1/chunks/test_page_1.pdf/pdf") assert response.status_code == 200 assert "application/pdf" in response.headers["content-type"] def test_get_chunk_pdf_not_found(client): """Should return 404 for non-existent chunk file.""" response = client.get("/api/v1/chunks/nonexistent.pdf/pdf") assert response.status_code == 404 def test_get_chunk_pdf_path_traversal_double_dot(client): """Should reject path traversal with .. (400 or 404 from Starlette normalization).""" response = client.get("/api/v1/chunks/../etc/passwd/pdf") assert response.status_code in (400, 404) def test_get_chunk_pdf_path_traversal_symlink_escape(client): """Should reject resolved path escaping base directory (400 or 404).""" response = client.get("/api/v1/chunks/../../etc/passwd/pdf") assert response.status_code in (400, 404) def test_get_chunk_pdf_with_spaces_in_filename(client, tmp_path): """Should serve files with spaces in the filename.""" chunk_dir = tmp_path / "chunks" test_file = chunk_dir / "NEC4 ACC_page_3.pdf" test_file.write_bytes(b"%PDF-1.4 fake content") response = client.get("/api/v1/chunks/NEC4 ACC_page_3.pdf/pdf") assert response.status_code == 200 assert "application/pdf" in response.headers["content-type"]