106 lines
4.0 KiB
Python
106 lines
4.0 KiB
Python
"""Acceptance tests: Phase 4 System Audio and Mic Capture with real DashScope ASR.
|
|
|
|
Prerequisites:
|
|
- ChromaDB running (local or docker)
|
|
- .env configured with valid DASHSCOPE_API_KEY
|
|
- SYSTEM_AUDIO_ENABLED=true and MIC_ENABLED=true in .env
|
|
- Chrome/Edge browser for system audio capture tests
|
|
- Working microphone for mic capture tests
|
|
|
|
These tests require manual user interaction (browser permission dialogs).
|
|
Run with: pytest app/test/acceptance/test_acceptance_phase4.py -v -m acceptance
|
|
"""
|
|
import pytest
|
|
|
|
|
|
@pytest.mark.acceptance
|
|
@pytest.mark.slow
|
|
@pytest.mark.skip(reason="Requires real browser with getDisplayMedia support")
|
|
def test_real_system_audio_capture_connects():
|
|
"""Verify WebSocket endpoint accepts system-audio source with real DashScope ASR.
|
|
|
|
Manual steps:
|
|
1. Start backend with SYSTEM_AUDIO_ENABLED=true and valid DASHSCOPE_API_KEY
|
|
2. Open frontend in Chrome/Edge
|
|
3. Click 'System Audio' capture button
|
|
4. Browser prompts for screen/tab share — select a tab playing audio
|
|
5. Verify WebSocket connects at /ws/asr/{video_id}?source=system-audio
|
|
6. Verify ASR transcript events appear in real time
|
|
7. Verify no 'System audio capture is disabled' error
|
|
"""
|
|
pass
|
|
|
|
|
|
@pytest.mark.acceptance
|
|
@pytest.mark.slow
|
|
@pytest.mark.skip(reason="Requires real microphone and browser permission")
|
|
def test_real_mic_capture_connects():
|
|
"""Verify WebSocket endpoint accepts mic source with real DashScope ASR.
|
|
|
|
Manual steps:
|
|
1. Start backend with MIC_ENABLED=true and valid DASHSCOPE_API_KEY
|
|
2. Open frontend in Chrome/Edge
|
|
3. Click 'Microphone' capture button
|
|
4. Browser prompts for microphone permission — grant it
|
|
5. Verify WebSocket connects at /ws/asr/{video_id}?source=mic
|
|
6. Speak into microphone and verify ASR transcript events appear
|
|
7. Verify no 'Microphone capture is disabled' error
|
|
"""
|
|
pass
|
|
|
|
|
|
@pytest.mark.acceptance
|
|
@pytest.mark.slow
|
|
@pytest.mark.skip(reason="Requires real DashScope ASR and browser audio capture")
|
|
def test_real_dashscope_asr_with_system_audio():
|
|
"""Real DashScope ASR processes system audio PCM stream.
|
|
|
|
Manual steps:
|
|
1. Start backend with SYSTEM_AUDIO_ENABLED=true
|
|
2. Play a Cantonese audio/video clip in a browser tab
|
|
3. Start system audio capture targeting that tab
|
|
4. Verify DashScope OmniRealtimeConversation receives PCM chunks
|
|
5. Verify transcript sentences appear with sentence-begin/sentence-end events
|
|
6. Verify accumulated transcript text is non-empty
|
|
7. Check backend logs for 'dashscope-session-closed' with text_len > 0
|
|
"""
|
|
pass
|
|
|
|
|
|
@pytest.mark.acceptance
|
|
@pytest.mark.slow
|
|
@pytest.mark.skip(reason="Requires real DashScope ASR and microphone")
|
|
def test_real_dashscope_asr_with_microphone():
|
|
"""Real DashScope ASR processes microphone PCM stream.
|
|
|
|
Manual steps:
|
|
1. Start backend with MIC_ENABLED=true
|
|
2. Start microphone capture in frontend
|
|
3. Speak a clear Cantonese sentence (e.g. '今日天氣好好')
|
|
4. Verify DashScope returns transcription results
|
|
5. Verify transcript text matches spoken content (allow partial match)
|
|
6. Verify Traditional Chinese conversion applied (if _to_traditional active)
|
|
7. Check backend logs for 'dashscope-session-closed' with text_len > 0
|
|
"""
|
|
pass
|
|
|
|
|
|
@pytest.mark.acceptance
|
|
@pytest.mark.slow
|
|
@pytest.mark.skip(reason="Full end-to-end requires browser + ASR + LLM")
|
|
def test_end_to_end_capture_to_rag():
|
|
"""End-to-end: capture system audio -> ASR transcript -> RAG query.
|
|
|
|
Manual steps:
|
|
1. Ingest relevant documents via /api/v1/ingest
|
|
2. Start backend with SYSTEM_AUDIO_ENABLED=true, valid DASHSCOPE_API_KEY,
|
|
and valid LLM_BASE_URL/LLM_API_KEY
|
|
3. Open frontend, start system audio capture on a tab with relevant audio
|
|
4. Let ASR produce a transcript in the query input
|
|
5. Click 'Ask' to trigger RAG query
|
|
6. Verify SSE stream: decomposed -> retrieving -> filtering -> generating -> completed
|
|
7. Verify final answer contains bullet points with source citations
|
|
8. Verify sources reference ingested documents
|
|
"""
|
|
pass
|