"""Phase 9 tests: CER/WER calculation for transcription accuracy (Sub-Phase 9.2). Covers: - CER for identical Chinese text returns 0.0 - CER for single-character substitution - CER for deletions and insertions - WER for Chinese text (word-level) - Mixed Chinese/English text - Empty reference and empty hypothesis edge cases - N/A status when reference transcript is missing """ import pytest from app.services.cer_wer import calculate_cer, calculate_wer class TestCER: def test_identical_returns_zero(self): result = calculate_cer("立法會今日討論", "立法會今日討論") assert result["cer"] == 0.0 assert result["substitutions"] == 0 assert result["deletions"] == 0 assert result["insertions"] == 0 assert result["hits"] == 7 def test_single_substitution(self): result = calculate_cer("立法會今日討論", "立法會昨日討論") assert result["cer"] > 0.0 assert result["substitutions"] == 1 assert result["hits"] == 6 def test_deletion(self): result = calculate_cer("立法會討論議題", "立法會討論") assert result["deletions"] >= 1 assert result["cer"] > 0.0 def test_insertion(self): result = calculate_cer("立法會討論", "立法會今日討論") assert result["insertions"] >= 1 assert result["cer"] > 0.0 def test_empty_reference(self): result = calculate_cer("", "something") assert result["cer"] == 0.0 assert result["reference_length"] == 0 def test_empty_hypothesis(self): result = calculate_cer("立法會", "") assert result["cer"] == 1.0 assert result["deletions"] == 3 def test_both_empty(self): result = calculate_cer("", "") assert result["cer"] == 0.0 def test_returns_all_fields(self): result = calculate_cer("立法會討論", "立法會討論") for key in ("cer", "reference_length", "transcribed_length", "substitutions", "deletions", "insertions", "hits"): assert key in result class TestWER: def test_identical_returns_zero(self): result = calculate_wer("立法會 今日 討論", "立法會 今日 討論") assert result["wer"] == 0.0 def test_word_substitution(self): result = calculate_wer("立法會 今日 討論", "立法會 昨日 討論") assert result["wer"] > 0.0 assert result["substitutions"] == 1 def test_mixed_cn_en(self): result = calculate_wer("LegCo 討論 議題", "LegCo 討論 政策") assert result["substitutions"] == 1 def test_empty_reference(self): result = calculate_wer("", "something") assert result["wer"] == 0.0 def test_empty_hypothesis(self): result = calculate_wer("立法會 討論", "") assert result["wer"] == 1.0