legco_ai_assistant/backend/app/test/acceptance/test_acceptance_phase4.py

106 lines
4.0 KiB
Python

"""Acceptance tests: Phase 4 System Audio and Mic Capture with real DashScope ASR.
Prerequisites:
- ChromaDB running (local or docker)
- .env configured with valid DASHSCOPE_API_KEY
- SYSTEM_AUDIO_ENABLED=true and MIC_ENABLED=true in .env
- Chrome/Edge browser for system audio capture tests
- Working microphone for mic capture tests
These tests require manual user interaction (browser permission dialogs).
Run with: pytest app/test/acceptance/test_acceptance_phase4.py -v -m acceptance
"""
import pytest
@pytest.mark.acceptance
@pytest.mark.slow
@pytest.mark.skip(reason="Requires real browser with getDisplayMedia support")
def test_real_system_audio_capture_connects():
"""Verify WebSocket endpoint accepts system-audio source with real DashScope ASR.
Manual steps:
1. Start backend with SYSTEM_AUDIO_ENABLED=true and valid DASHSCOPE_API_KEY
2. Open frontend in Chrome/Edge
3. Click 'System Audio' capture button
4. Browser prompts for screen/tab share — select a tab playing audio
5. Verify WebSocket connects at /ws/asr/{video_id}?source=system-audio
6. Verify ASR transcript events appear in real time
7. Verify no 'System audio capture is disabled' error
"""
pass
@pytest.mark.acceptance
@pytest.mark.slow
@pytest.mark.skip(reason="Requires real microphone and browser permission")
def test_real_mic_capture_connects():
"""Verify WebSocket endpoint accepts mic source with real DashScope ASR.
Manual steps:
1. Start backend with MIC_ENABLED=true and valid DASHSCOPE_API_KEY
2. Open frontend in Chrome/Edge
3. Click 'Microphone' capture button
4. Browser prompts for microphone permission — grant it
5. Verify WebSocket connects at /ws/asr/{video_id}?source=mic
6. Speak into microphone and verify ASR transcript events appear
7. Verify no 'Microphone capture is disabled' error
"""
pass
@pytest.mark.acceptance
@pytest.mark.slow
@pytest.mark.skip(reason="Requires real DashScope ASR and browser audio capture")
def test_real_dashscope_asr_with_system_audio():
"""Real DashScope ASR processes system audio PCM stream.
Manual steps:
1. Start backend with SYSTEM_AUDIO_ENABLED=true
2. Play a Cantonese audio/video clip in a browser tab
3. Start system audio capture targeting that tab
4. Verify DashScope OmniRealtimeConversation receives PCM chunks
5. Verify transcript sentences appear with sentence-begin/sentence-end events
6. Verify accumulated transcript text is non-empty
7. Check backend logs for 'dashscope-session-closed' with text_len > 0
"""
pass
@pytest.mark.acceptance
@pytest.mark.slow
@pytest.mark.skip(reason="Requires real DashScope ASR and microphone")
def test_real_dashscope_asr_with_microphone():
"""Real DashScope ASR processes microphone PCM stream.
Manual steps:
1. Start backend with MIC_ENABLED=true
2. Start microphone capture in frontend
3. Speak a clear Cantonese sentence (e.g. '今日天氣好好')
4. Verify DashScope returns transcription results
5. Verify transcript text matches spoken content (allow partial match)
6. Verify Traditional Chinese conversion applied (if _to_traditional active)
7. Check backend logs for 'dashscope-session-closed' with text_len > 0
"""
pass
@pytest.mark.acceptance
@pytest.mark.slow
@pytest.mark.skip(reason="Full end-to-end requires browser + ASR + LLM")
def test_end_to_end_capture_to_rag():
"""End-to-end: capture system audio -> ASR transcript -> RAG query.
Manual steps:
1. Ingest relevant documents via /api/v1/ingest
2. Start backend with SYSTEM_AUDIO_ENABLED=true, valid DASHSCOPE_API_KEY,
and valid LLM_BASE_URL/LLM_API_KEY
3. Open frontend, start system audio capture on a tab with relevant audio
4. Let ASR produce a transcript in the query input
5. Click 'Ask' to trigger RAG query
6. Verify SSE stream: decomposed -> retrieving -> filtering -> generating -> completed
7. Verify final answer contains bullet points with source citations
8. Verify sources reference ingested documents
"""
pass