legco_ai_assistant/backend/app/test/test_phase3_youtube_extract.py

478 lines
19 KiB
Python

"""Phase 3.2 tests: YouTube URL extraction via yt-dlp.
Covers:
- POST /api/v1/youtube/extract — VOD, live, upcoming, invalid URL
- Format selection: video-only ≤480p, best audio, HLS preference
- URL caching: in-memory with TTL, expiry triggers re-extract
- Proxy URL construction: upstream URL encoded in query param
- Error handling: DownloadError → 400, timeout → 504, disabled → 503
All yt-dlp external calls are mocked.
"""
import time
from unittest.mock import MagicMock, patch
import pytest
from fastapi import FastAPI
from fastapi.testclient import TestClient
# ---------------------------------------------------------------------------
# Helpers — fake yt-dlp format data
# ---------------------------------------------------------------------------
def _make_format(
format_id: str,
height: int | None = None,
vcodec: str = "none",
acodec: str = "none",
ext: str = "mp4",
protocol: str = "https",
url: str = "",
abr: float | None = None,
tbr: float | None = None,
resolution: str | None = None,
) -> dict:
return {
"format_id": format_id,
"height": height,
"width": height * 16 // 9 if height else None,
"vcodec": vcodec,
"acodec": acodec,
"ext": ext,
"protocol": protocol,
"url": url or f"https://example.com/{format_id}.{ext}",
"abr": abr,
"tbr": tbr,
"resolution": resolution or (f"{height * 16 // 9}x{height}" if height else None),
}
def _vod_info(video_id: str = "abc123") -> dict:
return {
"id": video_id,
"title": "Test VOD Video",
"thumbnail": "https://i.ytimg.com/vi/abc123/hqdefault.jpg",
"live_status": "not_live",
"duration": 300,
"formats": [
_make_format("137", height=1080, vcodec="avc1.640028", acodec="none", tbr=5000),
_make_format("136", height=720, vcodec="avc1.640028", acodec="none", tbr=2500),
_make_format("135", height=480, vcodec="avc1.640028", acodec="none", tbr=1200),
_make_format("134", height=360, vcodec="avc1.640028", acodec="none", tbr=600),
_make_format("133", height=240, vcodec="avc1.640028", acodec="none", tbr=300),
_make_format("140", acodec="mp4a.40.2", vcodec="none", abr=128),
_make_format("251", acodec="opus", vcodec="none", abr=160),
_make_format("18", height=360, vcodec="avc1.42001E", acodec="mp4a.40.2", tbr=500),
],
}
def _vod_info_hls(video_id: str = "abc123") -> dict:
return {
"id": video_id,
"title": "Test VOD with HLS",
"thumbnail": "https://i.ytimg.com/vi/abc123/hqdefault.jpg",
"live_status": "not_live",
"duration": 600,
"formats": [
_make_format("136", height=720, vcodec="avc1.640028", acodec="none", ext="m3u8", protocol="m3u8_native", tbr=2500),
_make_format("135", height=480, vcodec="avc1.640028", acodec="none", ext="m3u8", protocol="m3u8_native", tbr=1200),
_make_format("140", acodec="mp4a.40.2", vcodec="none", ext="m3u8", protocol="m3u8_native", abr=128),
],
}
def _live_info(video_id: str = "live999") -> dict:
return {
"id": video_id,
"title": "Live Stream Test",
"thumbnail": "https://i.ytimg.com/vi/live999/hqdefault_live.jpg",
"live_status": "is_live",
"duration": None,
"formats": [
_make_format("91", height=144, vcodec="avc1.42C00B", acodec="mp4a.40.5", ext="mp4", protocol="m3u8_native"),
_make_format("92", height=240, vcodec="avc1.4D4015", acodec="mp4a.40.5", ext="mp4", protocol="m3u8_native"),
_make_format("93", height=360, vcodec="avc1.4D401E", acodec="mp4a.40.2", ext="mp4", protocol="m3u8_native"),
_make_format("94", height=480, vcodec="avc1.4D401F", acodec="mp4a.40.2", ext="mp4", protocol="m3u8_native", tbr=1200),
_make_format("95", height=720, vcodec="avc1.4D401F", acodec="mp4a.40.2", ext="mp4", protocol="m3u8_native"),
],
}
def _upcoming_info(video_id: str = "up999") -> dict:
return {
"id": video_id,
"title": "Upcoming Stream",
"thumbnail": "https://i.ytimg.com/vi/up999/hqdefault.jpg",
"live_status": "is_upcoming",
"duration": None,
"formats": [],
}
def _private_info(video_id: str = "priv99") -> dict:
import yt_dlp
raise yt_dlp.utils.DownloadError("Private video. Sign in if you've been granted access to this video")
# ---------------------------------------------------------------------------
# Mock helpers
# ---------------------------------------------------------------------------
def _make_mock_ydl(return_value: dict | Exception) -> MagicMock:
"""Build a mock yt_dlp.YoutubeDL context manager with .extract_info."""
mock_instance = MagicMock()
if isinstance(return_value, Exception):
mock_instance.extract_info.side_effect = return_value
else:
mock_instance.extract_info.return_value = return_value
mock_ydl = MagicMock()
mock_ydl.__enter__.return_value = mock_instance
mock_ydl.__exit__.return_value = None
return mock_ydl
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def youtube_client(monkeypatch):
"""FastAPI TestClient with youtube router mounted, cached settings cleared."""
from app.routers.youtube import router
from app.core.config import get_settings
get_settings.cache_clear()
monkeypatch.setenv("YOUTUBE_PROXY_ENABLED", "true")
get_settings.cache_clear()
app = FastAPI()
app.include_router(router, prefix="/api/v1")
return TestClient(app)
# ---------------------------------------------------------------------------
# Unit: Format selection
# ---------------------------------------------------------------------------
class TestFormatSelection:
def test_selects_best_video_at_or_under_480p(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = _vod_info()["formats"]
video, audio = svc._select_best_formats(formats)
assert video is not None
assert audio is not None
assert video["height"] == 480
assert video["vcodec"] != "none"
assert video["acodec"] == "none"
assert audio["acodec"] != "none"
assert audio["vcodec"] == "none"
def test_falls_back_to_lowest_video_if_no_480p(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = [
_make_format("137", height=1080, vcodec="avc1", acodec="none", tbr=5000),
_make_format("136", height=720, vcodec="avc1", acodec="none", tbr=2500),
_make_format("140", acodec="mp4a", vcodec="none", abr=128),
]
video, audio = svc._select_best_formats(formats)
assert video is not None
assert video["height"] == 720 # Lowest available (no ≤480p exist)
def test_selects_highest_bitrate_audio(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = [
_make_format("137", height=480, vcodec="avc1", acodec="none", tbr=1200),
_make_format("140", acodec="mp4a", vcodec="none", abr=128),
_make_format("251", acodec="opus", vcodec="none", abr=160),
_make_format("250", acodec="opus", vcodec="none", abr=64),
]
video, audio = svc._select_best_formats(formats)
assert audio is not None
assert audio["format_id"] == "251" # Highest abr
def test_no_formats_raises(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
with pytest.raises(ValueError, match="No streamable formats"):
svc._select_best_formats([])
def test_no_video_only_formats_falls_back_to_combined(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = [
_make_format("18", height=360, vcodec="avc1", acodec="mp4a", tbr=500),
_make_format("140", acodec="mp4a", vcodec="none", abr=128),
]
video, audio = svc._select_best_formats(formats)
# Fallback: combined format as video
assert video is not None
assert video["format_id"] == "18"
assert audio is not None
def test_hls_preference_for_live(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = [
_make_format("135", height=480, vcodec="avc1", acodec="none", ext="mp4", protocol="https", tbr=1200),
_make_format("301", height=480, vcodec="avc1", acodec="none", ext="m3u8", protocol="m3u8_native", tbr=1200),
_make_format("140", acodec="mp4a", vcodec="none", ext="m3u8", protocol="m3u8_native", abr=128),
]
video, audio = svc._select_best_formats(formats)
assert video["protocol"] == "m3u8_native"
assert audio["protocol"] == "m3u8_native"
def test_combined_only_all_combined_formats(self):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=300)
formats = [
_make_format("93", height=360, vcodec="avc1", acodec="mp4a", ext="mp4", protocol="m3u8_native"),
_make_format("94", height=480, vcodec="avc1", acodec="mp4a", ext="mp4", protocol="m3u8_native"),
_make_format("95", height=720, vcodec="avc1", acodec="mp4a", ext="mp4", protocol="m3u8_native"),
_make_format("96", height=1080, vcodec="avc1", acodec="mp4a", ext="mp4", protocol="m3u8_native"),
]
video, audio = svc._select_best_formats(formats)
assert video["height"] == 480
assert audio["height"] == 480
assert video["url"] == audio["url"]
# ---------------------------------------------------------------------------
# Integration: Route + mocked yt-dlp
# ---------------------------------------------------------------------------
class TestYouTubeExtractVOD:
def test_extract_vod_returns_proxy_urls(self, youtube_client):
mock_ydl = _make_mock_ydl(_vod_info("abc123"))
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=abc123"},
)
assert resp.status_code == 200
data = resp.json()
assert data["video_id"] == "abc123"
assert data["title"] == "Test VOD Video"
assert data["is_live"] is False
assert data["is_upcoming"] is False
assert data["video_proxy_url"] is not None
assert data["audio_proxy_url"] is not None
assert data["video_proxy_url"].startswith("/api/v1/youtube/proxy/")
assert data["thumbnail_url"] == "https://i.ytimg.com/vi/abc123/hqdefault.jpg"
assert len(data["formats"]) > 0
def test_extract_vod_hls_returns_manifest_proxy_urls(self, youtube_client):
mock_ydl = _make_mock_ydl(_vod_info_hls("abc123"))
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=abc123"},
)
assert resp.status_code == 200
data = resp.json()
assert "manifest.m3u8?url=" in data["video_proxy_url"]
assert "manifest.m3u8?url=" in data["audio_proxy_url"]
def test_error_field_is_none_on_success(self, youtube_client):
mock_ydl = _make_mock_ydl(_vod_info())
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=abc123"},
)
assert resp.status_code == 200
assert resp.json()["error"] is None
class TestYouTubeExtractLive:
def test_extract_live_returns_is_live_true(self, youtube_client):
mock_ydl = _make_mock_ydl(_live_info())
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=live999"},
)
assert resp.status_code == 200
data = resp.json()
assert data["video_id"] == "live999"
assert data["is_live"] is True
assert data["is_upcoming"] is False
assert data["video_proxy_url"] is not None
assert data["audio_proxy_url"] is not None
def test_live_combined_format_same_url_for_both(self, youtube_client):
mock_ydl = _make_mock_ydl(_live_info("combined_test"))
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=combined_test"},
)
assert resp.status_code == 200
data = resp.json()
assert data["is_live"] is True
assert data["video_proxy_url"] == data["audio_proxy_url"]
class TestYouTubeExtractUpcoming:
def test_extract_upcoming_returns_is_upcoming_true(self, youtube_client):
mock_ydl = _make_mock_ydl(_upcoming_info())
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=up999"},
)
assert resp.status_code == 200
data = resp.json()
assert data["video_id"] == "up999"
assert data["is_upcoming"] is True
assert data["is_live"] is False
assert data["video_proxy_url"] is None
assert data["audio_proxy_url"] is None
class TestYouTubeExtractErrors:
def test_private_video_returns_error_field(self, youtube_client):
import yt_dlp
exc = yt_dlp.utils.DownloadError("Private video")
mock_ydl = _make_mock_ydl(exc)
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=priv99"},
)
assert resp.status_code == 200
data = resp.json()
assert data["error"] is not None
assert "Private video" in data["error"]
def test_po_token_error_invalidates_cache(self, monkeypatch):
import yt_dlp
from app.services.youtube_service import YouTubeService, _is_po_token_error
svc = YouTubeService(timeout=30, cache_ttl=300)
url = "https://www.youtube.com/watch?v=potest"
# Seed cache with a valid entry
svc._cache[url] = (100.0, {"video_id": "cached", "title": "Cached"})
# Mock yt-dlp to raise PO token error
exc = yt_dlp.utils.DownloadError("Sign in to confirm you're not a bot")
mock_ydl = _make_mock_ydl(exc)
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
import asyncio
result = asyncio.new_event_loop().run_until_complete(svc.extract_streams(url))
assert result["error"] is not None
assert "not a bot" in result["error"]
# Cache should be invalidated — next extract would re-attempt
assert url not in svc._cache
def test_is_po_token_error_detection(self):
from app.services.youtube_service import _is_po_token_error
assert _is_po_token_error("Sign in to confirm you're not a bot")
assert _is_po_token_error("ERROR: [youtube] PO Token expired")
assert _is_po_token_error("bot detection triggered for this request")
assert not _is_po_token_error("Video unavailable")
assert not _is_po_token_error("Private video")
def test_disabled_proxy_returns_503(self, monkeypatch, youtube_client):
monkeypatch.setenv("YOUTUBE_PROXY_ENABLED", "false")
from app.core.config import get_settings
get_settings.cache_clear()
resp = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=abc123"},
)
assert resp.status_code == 503
class TestURLCaching:
def test_cached_result_not_re_extracted(self, youtube_client):
mock_ydl = _make_mock_ydl(_vod_info("cached1"))
instance = mock_ydl.__enter__.return_value
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
r1 = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=cached1"},
)
r2 = youtube_client.post(
"/api/v1/youtube/extract",
json={"url": "https://www.youtube.com/watch?v=cached1"},
)
assert r1.status_code == 200
assert r2.status_code == 200
assert r1.json()["video_id"] == r2.json()["video_id"]
assert instance.extract_info.call_count == 1 # Cached, not called twice
def test_cache_expiry_triggers_re_extract(self, monkeypatch):
from app.services.youtube_service import YouTubeService
svc = YouTubeService(timeout=30, cache_ttl=0) # 0 TTL = immediate expiry
mock_ydl = _make_mock_ydl(_vod_info("exp1"))
instance = mock_ydl.__enter__.return_value
with patch("app.services.youtube_service.yt_dlp.YoutubeDL", return_value=mock_ydl):
import asyncio
asyncio.run(svc.extract_streams("https://www.youtube.com/watch?v=exp1"))
# Cache should be set but TTL=0 means expired
asyncio.run(svc.extract_streams("https://www.youtube.com/watch?v=exp1"))
assert instance.extract_info.call_count == 2
class TestProxyURLConstruction:
def test_proxy_url_encodes_upstream_url(self):
from app.services.youtube_service import YouTubeService
from urllib.parse import quote, unquote
svc = YouTubeService(timeout=30, cache_ttl=300)
upstream = "https://manifest.googlevideo.com/123/hls_playlist.m3u8?id=abc&key=def"
proxy = svc._build_proxy_url(upstream)
assert proxy.startswith("/api/v1/youtube/proxy/manifest.m3u8?url=")
# Extract and decode the URL parameter
encoded = proxy.split("url=", 1)[1]
decoded = unquote(encoded)
assert decoded == upstream