"""Acceptance test: Phase 2 end-to-end video → ASR → query flow. Prerequisites: - All Phase 2 services running (uvicorn) - DashScope API key configured in .env (DASHSCOPE_API_KEY) - ChromaDB with test documents ingested - Test video file with known Cantonese audio content - LLM provider configured and accessible """ import os import pytest @pytest.mark.acceptance @pytest.mark.slow def test_e2e_video_transcript_to_rag(): """Upload video → full transcript → RAG query → answer with citations.""" api_key = os.getenv("DASHSCOPE_API_KEY") if not api_key: pytest.skip("DASHSCOPE_API_KEY not configured") llm_key = os.getenv("LLM_API_KEY") if not llm_key: pytest.skip("LLM_API_KEY not configured") test_file = os.getenv("VIDEO_TEST_FILE") if not test_file or not os.path.exists(test_file): pytest.skip("VIDEO_TEST_FILE not configured or file not found") import requests base_url = os.getenv("BACKEND_URL", "http://localhost:8000") with open(test_file, "rb") as f: resp = requests.post( f"{base_url}/api/v1/video/upload", files={"file": (os.path.basename(test_file), f, "video/mp4")}, timeout=60, ) assert resp.status_code == 200, f"Upload failed: {resp.text}" video_id = resp.json()["video_id"] resp = requests.post( f"{base_url}/api/v1/video/{video_id}/transcribe", timeout=300, ) assert resp.status_code == 200, f"Transcribe failed: {resp.text}" transcript = resp.json()["text"] assert len(transcript) > 0, "Transcript should not be empty" resp = requests.post( f"{base_url}/api/v1/query", json={"question": transcript[:200]}, timeout=120, ) assert resp.status_code == 200