legco_ai_assistant/backend/app/test/test_phase2_full_transcript.py

195 lines
7.0 KiB
Python

"""Phase 2 tests: Full transcript endpoint (POST /api/v1/video/{video_id}/transcribe).
Covers:
- Successful transcription after video upload
- 404 for missing video
- ffmpeg audio extraction (mocked subprocess)
- Missing API key error handling
"""
import os
from pathlib import Path
from unittest.mock import AsyncMock, MagicMock, patch, PropertyMock
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
from app.routers.video import router
@pytest.fixture
def video_client(tmp_path, monkeypatch):
upload_dir = tmp_path / "test_uploads"
upload_dir.mkdir()
monkeypatch.setenv("VIDEO_UPLOAD_DIR", str(upload_dir))
monkeypatch.setenv("MAX_VIDEO_SIZE_MB", "50")
monkeypatch.setenv("ASR_PROVIDER", "dashscope")
monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test-key")
from app.core.config import get_settings
get_settings.cache_clear()
app = FastAPI()
app.include_router(router, prefix="/api/v1")
return TestClient(app), upload_dir
def _upload_video(client, filename="test.mp4", content=b"\x00" * 1024):
"""Helper to upload a video and return the video_id."""
resp = client.post(
"/api/v1/video/upload",
files={"file": (filename, content, "video/mp4")},
)
assert resp.status_code == 200
return resp.json()["video_id"]
class TestTranscribeSuccess:
@patch("app.routers.video.VideoService.extract_audio")
@patch("app.services.asr_providers.OpenAI")
def test_transcribe_returns_response(self, mock_openai_cls, mock_extract, video_client):
"""POST transcribe should return FullTranscriptResponse."""
client, upload_dir = video_client
video_id = _upload_video(client)
# Mock extract_audio to return a fake WAV path
fake_wav = upload_dir / "extracted.wav"
fake_wav.write_bytes(b"RIFF" + b"\x00" * 100)
mock_extract.return_value = fake_wav
# Mock OpenAI client
mock_resp = MagicMock()
mock_resp.choices = [MagicMock()]
mock_resp.choices[0].message.content = "测试转录结果"
mock_openai_instance = MagicMock()
mock_openai_instance.chat.completions.create.return_value = mock_resp
mock_openai_cls.return_value = mock_openai_instance
resp = client.post(f"/api/v1/video/{video_id}/transcribe")
assert resp.status_code == 200
data = resp.json()
assert "text" in data
assert "language" in data
assert data["language"] == "yue"
# Text should be traditional Chinese
assert "" in data["text"] or "" in data["text"]
@patch("app.routers.video.VideoService.extract_audio")
@patch("app.services.asr_providers.OpenAI")
def test_transcribe_custom_language(self, mock_openai_cls, mock_extract, video_client):
"""POST transcribe with language param should pass it through."""
client, upload_dir = video_client
video_id = _upload_video(client)
fake_wav = upload_dir / "extracted.wav"
fake_wav.write_bytes(b"RIFF" + b"\x00" * 100)
mock_extract.return_value = fake_wav
mock_resp = MagicMock()
mock_resp.choices = [MagicMock()]
mock_resp.choices[0].message.content = "hello world"
mock_openai_instance = MagicMock()
mock_openai_instance.chat.completions.create.return_value = mock_resp
mock_openai_cls.return_value = mock_openai_instance
resp = client.post(f"/api/v1/video/{video_id}/transcribe?language=en")
assert resp.status_code == 200
assert resp.json()["language"] == "en"
class TestTranscribeMissingVideo:
def test_404_for_unknown_video(self, video_client):
"""POST transcribe for non-existent video should return 404."""
client, _ = video_client
resp = client.post("/api/v1/video/nonexistent-video-id/transcribe")
assert resp.status_code == 404
class TestTranscribeExtractsAudio:
@patch("app.services.video_service.asyncio.create_subprocess_exec")
async def test_extract_audio_calls_ffmpeg(self, mock_subprocess, tmp_path):
"""extract_audio should call ffmpeg with correct arguments."""
from app.services.video_service import VideoService
# Setup: create a fake video file
upload_dir = tmp_path / "uploads"
upload_dir.mkdir()
video_file = upload_dir / "test-video.mp4"
video_file.write_bytes(b"fake-video-content")
service = VideoService(
upload_dir=str(upload_dir),
max_size_mb=300,
supported_formats=[".mp4"],
)
# Mock the subprocess
mock_proc = AsyncMock()
mock_proc.returncode = 0
mock_proc.communicate.return_value = (b"ffmpeg output", b"")
mock_subprocess.return_value = mock_proc
result = await service.extract_audio("test-video")
assert result is not None
# Verify ffmpeg was called
mock_subprocess.assert_called_once()
call_args = mock_subprocess.call_args[0]
assert call_args[0] == "ffmpeg"
assert "-i" in call_args
@patch("app.services.video_service.asyncio.create_subprocess_exec")
async def test_extract_audio_fails_gracefully(self, mock_subprocess, tmp_path):
"""extract_audio should raise on ffmpeg failure."""
from app.services.video_service import VideoService
upload_dir = tmp_path / "uploads"
upload_dir.mkdir()
video_file = upload_dir / "test-fail.mp4"
video_file.write_bytes(b"bad-content")
service = VideoService(
upload_dir=str(upload_dir),
max_size_mb=300,
supported_formats=[".mp4"],
)
mock_proc = AsyncMock()
mock_proc.returncode = 1
mock_proc.communicate.return_value = (b"", b"Error: Invalid data")
mock_subprocess.return_value = mock_proc
with pytest.raises(Exception):
await service.extract_audio("test-fail")
class TestTranscribeMissingApiKey:
def test_missing_api_key_returns_500(self, monkeypatch, tmp_path):
"""Empty DASHSCOPE_API_KEY should return 500 with descriptive message."""
upload_dir = tmp_path / "uploads"
upload_dir.mkdir()
monkeypatch.setenv("VIDEO_UPLOAD_DIR", str(upload_dir))
monkeypatch.setenv("MAX_VIDEO_SIZE_MB", "50")
monkeypatch.setenv("ASR_PROVIDER", "dashscope")
monkeypatch.setenv("DASHSCOPE_API_KEY", "")
from app.core.config import get_settings
get_settings.cache_clear()
app = FastAPI()
app.include_router(router, prefix="/api/v1")
client = TestClient(app)
# Upload a video first
resp = client.post(
"/api/v1/video/upload",
files={"file": ("test.mp4", b"\x00" * 512, "video/mp4")},
)
assert resp.status_code == 200
video_id = resp.json()["video_id"]
# Try to transcribe
resp = client.post(f"/api/v1/video/{video_id}/transcribe")
assert resp.status_code == 500
assert "DASHSCOPE_API_KEY" in resp.json()["detail"] or "API key" in resp.json()["detail"]