From 226f4ed700e3dcd193a9a39e0bd700a063754a6b Mon Sep 17 00:00:00 2001 From: Woody Date: Mon, 4 May 2026 14:59:23 +0800 Subject: [PATCH] test: update integration mocks for dual-client architecture (Phase 6) Added complete_structured() to mock classes, split response lists between LLMClientDP (decompose) and LLMClient (filter+generate), and patched both clients in all integration tests. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- backend/app/test/test_phase1_query.py | 24 +++++ .../test_phase3_query_history_integration.py | 89 +++++++++++-------- .../test_phase4_integration_query_pipeline.py | 44 ++++----- 3 files changed, 102 insertions(+), 55 deletions(-) diff --git a/backend/app/test/test_phase1_query.py b/backend/app/test/test_phase1_query.py index 954135b..8218493 100644 --- a/backend/app/test/test_phase1_query.py +++ b/backend/app/test/test_phase1_query.py @@ -48,6 +48,10 @@ class _MockLLMClient: return json.dumps({"0": [8.0, 7.5]}) return "- Bullet point answer\n- Another point" + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + """Structured output path — raise to trigger legacy fallback.""" + raise RuntimeError("structured output not mocked") + class _MockLLMClientNoChunks: """LLM mock that returns decomposition but no relevant chunks survive filter.""" @@ -60,6 +64,10 @@ class _MockLLMClientNoChunks: return json.dumps({"0": [2.0, 1.5]}) return "I could not find any relevant information." + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + """Structured output path — raise to trigger legacy fallback.""" + raise RuntimeError("structured output not mocked") + class _DeterministicEmbedding: """Lightweight embedding function that returns deterministic vectors. @@ -191,6 +199,10 @@ class TestQuery: "app.routers.query.LLMClient", lambda settings: _MockLLMClient(), ) + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + lambda settings: _MockLLMClient(), + ) response = client.post( "/api/v1/query", @@ -231,6 +243,10 @@ class TestQuery: "app.routers.query.LLMClient", lambda settings: _MockLLMClientNoChunks(), ) + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + lambda settings: _MockLLMClientNoChunks(), + ) response = client.post( "/api/v1/query", @@ -256,6 +272,10 @@ class TestQuery: "app.routers.query.LLMClient", lambda settings: _MockLLMClient(), ) + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + lambda settings: _MockLLMClient(), + ) response = client.post( "/api/v1/query", @@ -270,6 +290,10 @@ class TestQuery: "app.routers.query.LLMClient", lambda settings: _MockLLMClient(), ) + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + lambda settings: _MockLLMClient(), + ) response = client.post( "/api/v1/query", diff --git a/backend/app/test/test_phase3_query_history_integration.py b/backend/app/test/test_phase3_query_history_integration.py index b4774b4..ddf3dab 100644 --- a/backend/app/test/test_phase3_query_history_integration.py +++ b/backend/app/test/test_phase3_query_history_integration.py @@ -98,12 +98,39 @@ def _make_mock_llm_class(responses): return resp raise RuntimeError(f"No more mock responses (call #{self._idx + 1})") + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + raise RuntimeError("structured output not mocked") + async def close(self): pass return _MockLLM +def _mock_both_llm_clients(monkeypatch, responses_or_class): + """Patch both LLMClient and LLMClientDP with the same mock. + + Accepts either a list of responses (uses _make_mock_llm_class) or + a class directly. + + When a list is provided, the first response goes to LLMClientDP + (decompose), and the remaining responses go to LLMClient + (filter + generate). + """ + if isinstance(responses_or_class, list): + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + _make_mock_llm_class([responses_or_class[0]]), + ) + monkeypatch.setattr( + "app.routers.query.LLMClient", + _make_mock_llm_class(responses_or_class[1:]), + ) + else: + monkeypatch.setattr("app.routers.query.LLMClient", responses_or_class) + monkeypatch.setattr("app.routers.query.LLMClientDP", responses_or_class) + + # Standard mock responses for a successful 2-sub-question pipeline _STANDARD_RESPONSES = [ '["What are time extensions?", "What notice is required?"]', @@ -221,9 +248,7 @@ def test_query_pipeline_creates_history_record(tmp_path, monkeypatch): and ``profile_used = "A"``. """ env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -266,9 +291,7 @@ def test_history_record_contains_prompts(tmp_path, monkeypatch): are stored as non-empty strings in the history record. """ env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -290,9 +313,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch): ```` tags including Filename, Page, and Content fields. """ env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -323,9 +344,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch): def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch): """Verify ``chunks_filtered`` XML contains ``Relevance`` scores.""" env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -351,9 +370,7 @@ def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch): def test_history_timing_accurate(tmp_path, monkeypatch): """Verify all stage timing fields are positive integers.""" env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -397,9 +414,7 @@ def test_history_count_fields_are_ints(tmp_path, monkeypatch): (scores 8.5, 9.0 > threshold 7.0) → 4 total filtered. """ env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -440,9 +455,7 @@ def test_history_fire_and_forget(tmp_path, monkeypatch): if os.path.exists(env["history_db"]): os.remove(env["history_db"]) - monkeypatch.setattr( - "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES) - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -467,14 +480,29 @@ def test_history_not_created_on_error(tmp_path, monkeypatch): async def complete(self, prompt, temperature=0.7, step_name="LLM"): self._call_count += 1 if self._call_count == 1: - return '["test question"]' - if self._call_count == 2: return '{"0": [8.5, 9.0]}' raise RuntimeError("LLM generate error") + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + raise RuntimeError("structured output not mocked") + async def close(self): pass + class _DecomposeOnly: + def __init__(self, settings): + self.settings = settings + + async def complete(self, prompt, temperature=0.7, step_name="LLM"): + return '["test question"]' + + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + raise RuntimeError("structured output not mocked") + + async def close(self): + pass + + monkeypatch.setattr("app.routers.query.LLMClientDP", _DecomposeOnly) monkeypatch.setattr("app.routers.query.LLMClient", _ErrorOnGenerateLLM) from app.main import app @@ -506,10 +534,7 @@ class TestPerSubQPipelineHistory: def test_per_subq_pipeline_records_history(self, tmp_path, monkeypatch): """Per-sub-q pipeline should record history with sub_question_sources.""" env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class(_STANDARD_RESPONSES), - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -537,10 +562,7 @@ class TestPerSubQPipelineHistory: def test_per_subq_history_contains_chunk_xml(self, tmp_path, monkeypatch): """History should contain XML-tagged chunks_retrieved and chunks_filtered.""" env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class(_STANDARD_RESPONSES), - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app @@ -560,10 +582,7 @@ class TestPerSubQPipelineHistory: def test_per_subq_history_prompts_are_strings(self, tmp_path, monkeypatch): """All prompt fields must be strings (non-empty with real services).""" env = _setup_env(tmp_path, monkeypatch) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class(_STANDARD_RESPONSES), - ) + _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES) from app.main import app diff --git a/backend/app/test/test_phase4_integration_query_pipeline.py b/backend/app/test/test_phase4_integration_query_pipeline.py index 56445bc..9608754 100644 --- a/backend/app/test/test_phase4_integration_query_pipeline.py +++ b/backend/app/test/test_phase4_integration_query_pipeline.py @@ -87,12 +87,31 @@ def _make_mock_llm_class(responses): return resp raise RuntimeError(f"No more mock responses (call #{self._idx + 1})") + async def complete_structured(self, prompt, pydantic_model, step_name="LLM"): + raise RuntimeError("structured output not mocked") + async def close(self): pass return _MockLLM +def _mock_both_llm_clients(monkeypatch, responses): + """Patch both LLMClient and LLMClientDP with the same mock class. + + The first response goes to LLMClientDP (decompose), and the + remaining responses go to LLMClient (filter + generate). + """ + monkeypatch.setattr( + "app.routers.query.LLMClientDP", + _make_mock_llm_class([responses[0]]), + ) + monkeypatch.setattr( + "app.routers.query.LLMClient", + _make_mock_llm_class(responses[1:]), + ) + + def _setup_env(tmp_path, monkeypatch, seed_docs=None): """Set up real ChromaDB + SQLite via tmp_path for pipeline tests.""" seed_docs = seed_docs or SEED_DOCS @@ -174,10 +193,7 @@ def test_full_pipeline_with_two_subquestions(tmp_path, monkeypatch): "- Notify the project manager [NEC4.pdf, page 12]\n" ) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]), - ) + _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp]) from app.main import app @@ -223,10 +239,7 @@ def test_pipeline_with_empty_decomposition(tmp_path, monkeypatch): "## Sub-question 1: What is the time limit?\n- Answer here\n" ) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]), - ) + _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp]) from app.main import app @@ -251,10 +264,7 @@ def test_pipeline_single_subquestion(tmp_path, monkeypatch): filter_resp = '{"0": [8.5, 9.0]}' generate_resp = "## Sub-question 1: What is X?\n- Answer here\n" - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]), - ) + _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp]) from app.main import app @@ -274,10 +284,7 @@ def test_pipeline_filter_all_rejected(tmp_path, monkeypatch): # Both chunks score below threshold 7.0 filter_resp = '{"0": [2.0, 3.0]}' - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class([decompose_resp, filter_resp]), - ) + _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp]) from app.main import app @@ -308,10 +315,7 @@ def test_pipeline_retrieval_empty_for_one_subq(tmp_path, monkeypatch): "- No relevant information found.\n" ) - monkeypatch.setattr( - "app.routers.query.LLMClient", - _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]), - ) + _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp]) from app.main import app