legco_ai_assistant/backend/app/test/test_phase1_chunk_serving.py

74 lines
3.1 KiB
Python

"""Tests for chunk PDF file serving endpoint.
Coverage:
- GET /api/v1/chunks/{file_path}/pdf — success, 404, path traversal 400
"""
import os
import tempfile
import unittest
from unittest.mock import patch
from fastapi.testclient import TestClient
from app.main import app
class TestChunkServing(unittest.TestCase):
"""Test GET /api/v1/chunks/{file_path}/pdf endpoint."""
def setUp(self):
self.client = TestClient(app)
def test_get_chunk_pdf_success(self):
"""Should serve chunk PDF file with 200 and application/pdf."""
with tempfile.TemporaryDirectory() as tmp_dir:
test_file = os.path.join(tmp_dir, "test_page_1.pdf")
with open(test_file, "wb") as f:
f.write(b"%PDF-1.4 fake content")
with patch("app.core.config.get_settings") as mock_settings:
mock_settings.return_value.document_chunk_path = tmp_dir
response = self.client.get("/api/v1/chunks/test_page_1.pdf/pdf")
self.assertEqual(response.status_code, 200)
self.assertIn("application/pdf", response.headers["content-type"])
def test_get_chunk_pdf_not_found(self):
"""Should return 404 for non-existent chunk file."""
with patch("app.core.config.get_settings") as mock_settings:
mock_settings.return_value.document_chunk_path = "/tmp/nonexistent_chunk_dir"
response = self.client.get("/api/v1/chunks/nonexistent.pdf/pdf")
self.assertEqual(response.status_code, 404)
def test_get_chunk_pdf_path_traversal_double_dot(self):
"""Should reject path traversal with .. (404 due to Starlette normalization)."""
with patch("app.core.config.get_settings") as mock_settings:
mock_settings.return_value.document_chunk_path = "/tmp/fake_chunk_dir"
response = self.client.get("/api/v1/chunks/../etc/passwd/pdf")
self.assertIn(response.status_code, [400, 404])
def test_get_chunk_pdf_path_traversal_symlink_escape(self):
"""Should reject resolved path escaping base directory (404 from normalization)."""
with tempfile.TemporaryDirectory() as tmp_dir:
with patch("app.core.config.get_settings") as mock_settings:
mock_settings.return_value.document_chunk_path = tmp_dir
response = self.client.get("/api/v1/chunks/../../etc/passwd/pdf")
self.assertIn(response.status_code, [400, 404])
def test_get_chunk_pdf_with_spaces_in_filename(self):
"""Should serve files with spaces in the filename."""
with tempfile.TemporaryDirectory() as tmp_dir:
test_file = os.path.join(tmp_dir, "NEC4 ACC_page_3.pdf")
with open(test_file, "wb") as f:
f.write(b"%PDF-1.4 fake content")
with patch("app.core.config.get_settings") as mock_settings:
mock_settings.return_value.document_chunk_path = tmp_dir
response = self.client.get("/api/v1/chunks/NEC4 ACC_page_3.pdf/pdf")
self.assertEqual(response.status_code, 200)
self.assertIn("application/pdf", response.headers["content-type"])