legco_ai_assistant/backend/app/test/test_phase9_cer_wer.py

84 lines
2.8 KiB
Python

"""Phase 9 tests: CER/WER calculation for transcription accuracy (Sub-Phase 9.2).
Covers:
- CER for identical Chinese text returns 0.0
- CER for single-character substitution
- CER for deletions and insertions
- WER for Chinese text (word-level)
- Mixed Chinese/English text
- Empty reference and empty hypothesis edge cases
- N/A status when reference transcript is missing
"""
import pytest
from app.services.cer_wer import calculate_cer, calculate_wer
class TestCER:
def test_identical_returns_zero(self):
result = calculate_cer("立法會今日討論", "立法會今日討論")
assert result["cer"] == 0.0
assert result["substitutions"] == 0
assert result["deletions"] == 0
assert result["insertions"] == 0
assert result["hits"] == 7
def test_single_substitution(self):
result = calculate_cer("立法會今日討論", "立法會昨日討論")
assert result["cer"] > 0.0
assert result["substitutions"] == 1
assert result["hits"] == 6
def test_deletion(self):
result = calculate_cer("立法會討論議題", "立法會討論")
assert result["deletions"] >= 1
assert result["cer"] > 0.0
def test_insertion(self):
result = calculate_cer("立法會討論", "立法會今日討論")
assert result["insertions"] >= 1
assert result["cer"] > 0.0
def test_empty_reference(self):
result = calculate_cer("", "something")
assert result["cer"] == 0.0
assert result["reference_length"] == 0
def test_empty_hypothesis(self):
result = calculate_cer("立法會", "")
assert result["cer"] == 1.0
assert result["deletions"] == 3
def test_both_empty(self):
result = calculate_cer("", "")
assert result["cer"] == 0.0
def test_returns_all_fields(self):
result = calculate_cer("立法會討論", "立法會討論")
for key in ("cer", "reference_length", "transcribed_length",
"substitutions", "deletions", "insertions", "hits"):
assert key in result
class TestWER:
def test_identical_returns_zero(self):
result = calculate_wer("立法會 今日 討論", "立法會 今日 討論")
assert result["wer"] == 0.0
def test_word_substitution(self):
result = calculate_wer("立法會 今日 討論", "立法會 昨日 討論")
assert result["wer"] > 0.0
assert result["substitutions"] == 1
def test_mixed_cn_en(self):
result = calculate_wer("LegCo 討論 議題", "LegCo 討論 政策")
assert result["substitutions"] == 1
def test_empty_reference(self):
result = calculate_wer("", "something")
assert result["wer"] == 0.0
def test_empty_hypothesis(self):
result = calculate_wer("立法會 討論", "")
assert result["wer"] == 1.0