71 lines
2.3 KiB
Python
71 lines
2.3 KiB
Python
"""Tests for chunk PDF file serving endpoint.
|
|
|
|
Coverage:
|
|
- GET /api/v1/chunks/{file_path}/pdf — success, 404, path traversal 400
|
|
|
|
Uses real filesystem (tmp_path) via monkeypatch.setenv — no mocks on internal services.
|
|
"""
|
|
import os
|
|
|
|
import pytest
|
|
from fastapi.testclient import TestClient
|
|
|
|
|
|
@pytest.fixture
|
|
def client(tmp_path, monkeypatch):
|
|
"""TestClient with DOCUMENT_CHUNK_PATH pointing to a temp directory."""
|
|
chunk_dir = tmp_path / "chunks"
|
|
chunk_dir.mkdir()
|
|
monkeypatch.setenv("DOCUMENT_CHUNK_PATH", str(chunk_dir))
|
|
monkeypatch.setenv("CHROMA_DB_PATH", str(tmp_path / "chroma_test"))
|
|
from app.core.config import get_settings
|
|
get_settings.cache_clear()
|
|
from app.main import app
|
|
yield TestClient(app)
|
|
get_settings.cache_clear()
|
|
|
|
|
|
def test_get_chunk_pdf_success(client, tmp_path):
|
|
"""Should serve chunk PDF file with 200 and application/pdf."""
|
|
chunk_dir = tmp_path / "chunks"
|
|
test_file = chunk_dir / "test_page_1.pdf"
|
|
test_file.write_bytes(b"%PDF-1.4 fake content")
|
|
|
|
response = client.get("/api/v1/chunks/test_page_1.pdf/pdf")
|
|
|
|
assert response.status_code == 200
|
|
assert "application/pdf" in response.headers["content-type"]
|
|
|
|
|
|
def test_get_chunk_pdf_not_found(client):
|
|
"""Should return 404 for non-existent chunk file."""
|
|
response = client.get("/api/v1/chunks/nonexistent.pdf/pdf")
|
|
|
|
assert response.status_code == 404
|
|
|
|
|
|
def test_get_chunk_pdf_path_traversal_double_dot(client):
|
|
"""Should reject path traversal with .. (400 or 404 from Starlette normalization)."""
|
|
response = client.get("/api/v1/chunks/../etc/passwd/pdf")
|
|
|
|
assert response.status_code in (400, 404)
|
|
|
|
|
|
def test_get_chunk_pdf_path_traversal_symlink_escape(client):
|
|
"""Should reject resolved path escaping base directory (400 or 404)."""
|
|
response = client.get("/api/v1/chunks/../../etc/passwd/pdf")
|
|
|
|
assert response.status_code in (400, 404)
|
|
|
|
|
|
def test_get_chunk_pdf_with_spaces_in_filename(client, tmp_path):
|
|
"""Should serve files with spaces in the filename."""
|
|
chunk_dir = tmp_path / "chunks"
|
|
test_file = chunk_dir / "NEC4 ACC_page_3.pdf"
|
|
test_file.write_bytes(b"%PDF-1.4 fake content")
|
|
|
|
response = client.get("/api/v1/chunks/NEC4 ACC_page_3.pdf/pdf")
|
|
|
|
assert response.status_code == 200
|
|
assert "application/pdf" in response.headers["content-type"]
|