diff --git a/backend/app/test/acceptance/test_acceptance_phase8_qa_chunking.py b/backend/app/test/acceptance/test_acceptance_phase8_qa_chunking.py new file mode 100644 index 0000000..7c791da --- /dev/null +++ b/backend/app/test/acceptance/test_acceptance_phase8_qa_chunking.py @@ -0,0 +1,60 @@ +"""Acceptance tests: Phase 8 Q&A-pair chunking with real LTT PDFs. + +Prerequisites: +- ChromaDB running (local) +- .env configured with valid LLM_BASE_URL, LLM_API_KEY, LLM_MODEL_NAME +- Test PDFs available in ../../test materials/LTT/ + +These tests require real LLM calls and actual LegCo PDFs. +Run manually: pytest app/test/acceptance/test_acceptance_phase8_qa_chunking.py -v -m acceptance +""" +import os +import sys + +import pytest + + +@pytest.mark.acceptance +@pytest.mark.slow +class TestRealQaChunking: + """End-to-end Q&A chunking with real LegCo PDFs from test materials/LTT/.""" + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_real_qa_chunking_fileE(self): + """File E produces 12 Chinese Q&A pairs + 3 Others + narrative sections.""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_real_qa_chunking_fileL(self): + """File L produces 24 English Q&A pairs + narrative sections.""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_real_qa_chunking_fileB(self): + """File B produces 3 Chinese Q&A pairs + narrative sections.""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_real_qa_chunking_fileA(self): + """File A falls back to narrative chunking (no Q&A, should not error).""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_table_extraction_fileE(self): + """Tables in File E answers converted to markdown.""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_table_extraction_fileL(self): + """Tables in File L answers converted to markdown.""" + pass + + @pytest.mark.skip(reason="Requires real LLM API and test PDFs in test materials/LTT/") + def test_qa_page_references(self): + """Each Q&A chunk's page number points to question (問) location.""" + pass + + @pytest.mark.skip(reason="Requires full pipeline with LLM, embeddings, ChromaDB") + def test_full_pipeline_question_strategy(self): + """Full ingest -> retrieve -> query pipeline with Q&A chunks.""" + pass