legco_ai_assistant/backend/app/test/test_phase1_chunk_serving.py

71 lines
2.3 KiB
Python

"""Tests for chunk PDF file serving endpoint.
Coverage:
- GET /api/v1/chunks/{file_path}/pdf — success, 404, path traversal 400
Uses real filesystem (tmp_path) via monkeypatch.setenv — no mocks on internal services.
"""
import os
import pytest
from fastapi.testclient import TestClient
@pytest.fixture
def client(tmp_path, monkeypatch):
"""TestClient with DOCUMENT_CHUNK_PATH pointing to a temp directory."""
chunk_dir = tmp_path / "chunks"
chunk_dir.mkdir()
monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir))
monkeypatch.setenv("CHROMA_DB_PATH", str(tmp_path / "chroma_test"))
from app.core.config import get_settings
get_settings.cache_clear()
from app.main import app
yield TestClient(app)
get_settings.cache_clear()
def test_get_chunk_pdf_success(client, tmp_path):
"""Should serve chunk PDF file with 200 and application/pdf."""
chunk_dir = tmp_path / "chunks"
test_file = chunk_dir / "test_page_1.pdf"
test_file.write_bytes(b"%PDF-1.4 fake content")
response = client.get("/api/v1/chunks/test_page_1.pdf/pdf")
assert response.status_code == 200
assert "application/pdf" in response.headers["content-type"]
def test_get_chunk_pdf_not_found(client):
"""Should return 404 for non-existent chunk file."""
response = client.get("/api/v1/chunks/nonexistent.pdf/pdf")
assert response.status_code == 404
def test_get_chunk_pdf_path_traversal_double_dot(client):
"""Should reject path traversal with .. (400 or 404 from Starlette normalization)."""
response = client.get("/api/v1/chunks/../etc/passwd/pdf")
assert response.status_code in (400, 404)
def test_get_chunk_pdf_path_traversal_symlink_escape(client):
"""Should reject resolved path escaping base directory (400 or 404)."""
response = client.get("/api/v1/chunks/../../etc/passwd/pdf")
assert response.status_code in (400, 404)
def test_get_chunk_pdf_with_spaces_in_filename(client, tmp_path):
"""Should serve files with spaces in the filename."""
chunk_dir = tmp_path / "chunks"
test_file = chunk_dir / "NEC4 ACC_page_3.pdf"
test_file.write_bytes(b"%PDF-1.4 fake content")
response = client.get("/api/v1/chunks/NEC4 ACC_page_3.pdf/pdf")
assert response.status_code == 200
assert "application/pdf" in response.headers["content-type"]