From 226f4ed700e3dcd193a9a39e0bd700a063754a6b Mon Sep 17 00:00:00 2001
From: Woody <woody.ck.tse@gmail.com>
Date: Mon, 4 May 2026 14:59:23 +0800
Subject: [PATCH] test: update integration mocks for dual-client architecture
 (Phase 6)

Added complete_structured() to mock classes, split response lists between LLMClientDP (decompose) and LLMClient (filter+generate), and patched both clients in all integration tests.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
---
 backend/app/test/test_phase1_query.py         | 24 +++++
 .../test_phase3_query_history_integration.py  | 89 +++++++++++--------
 .../test_phase4_integration_query_pipeline.py | 44 ++++-----
 3 files changed, 102 insertions(+), 55 deletions(-)

diff --git a/backend/app/test/test_phase1_query.py b/backend/app/test/test_phase1_query.py
index 954135b..8218493 100644
--- a/backend/app/test/test_phase1_query.py
+++ b/backend/app/test/test_phase1_query.py
@@ -48,6 +48,10 @@ class _MockLLMClient:
             return json.dumps({"0": [8.0, 7.5]})
         return "- Bullet point answer\n- Another point"
 
+    async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+        """Structured output path — raise to trigger legacy fallback."""
+        raise RuntimeError("structured output not mocked")
+
 
 class _MockLLMClientNoChunks:
     """LLM mock that returns decomposition but no relevant chunks survive filter."""
@@ -60,6 +64,10 @@ class _MockLLMClientNoChunks:
             return json.dumps({"0": [2.0, 1.5]})
         return "I could not find any relevant information."
 
+    async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+        """Structured output path — raise to trigger legacy fallback."""
+        raise RuntimeError("structured output not mocked")
+
 
 class _DeterministicEmbedding:
     """Lightweight embedding function that returns deterministic vectors.
@@ -191,6 +199,10 @@ class TestQuery:
             "app.routers.query.LLMClient",
             lambda settings: _MockLLMClient(),
         )
+        monkeypatch.setattr(
+            "app.routers.query.LLMClientDP",
+            lambda settings: _MockLLMClient(),
+        )
 
         response = client.post(
             "/api/v1/query",
@@ -231,6 +243,10 @@ class TestQuery:
             "app.routers.query.LLMClient",
             lambda settings: _MockLLMClientNoChunks(),
         )
+        monkeypatch.setattr(
+            "app.routers.query.LLMClientDP",
+            lambda settings: _MockLLMClientNoChunks(),
+        )
 
         response = client.post(
             "/api/v1/query",
@@ -256,6 +272,10 @@ class TestQuery:
             "app.routers.query.LLMClient",
             lambda settings: _MockLLMClient(),
         )
+        monkeypatch.setattr(
+            "app.routers.query.LLMClientDP",
+            lambda settings: _MockLLMClient(),
+        )
 
         response = client.post(
             "/api/v1/query",
@@ -270,6 +290,10 @@ class TestQuery:
             "app.routers.query.LLMClient",
             lambda settings: _MockLLMClient(),
         )
+        monkeypatch.setattr(
+            "app.routers.query.LLMClientDP",
+            lambda settings: _MockLLMClient(),
+        )
 
         response = client.post(
             "/api/v1/query",
diff --git a/backend/app/test/test_phase3_query_history_integration.py b/backend/app/test/test_phase3_query_history_integration.py
index b4774b4..ddf3dab 100644
--- a/backend/app/test/test_phase3_query_history_integration.py
+++ b/backend/app/test/test_phase3_query_history_integration.py
@@ -98,12 +98,39 @@ def _make_mock_llm_class(responses):
                 return resp
             raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
 
+        async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+            raise RuntimeError("structured output not mocked")
+
         async def close(self):
             pass
 
     return _MockLLM
 
 
+def _mock_both_llm_clients(monkeypatch, responses_or_class):
+    """Patch both LLMClient and LLMClientDP with the same mock.
+
+    Accepts either a list of responses (uses _make_mock_llm_class) or
+    a class directly.
+
+    When a list is provided, the first response goes to LLMClientDP
+    (decompose), and the remaining responses go to LLMClient
+    (filter + generate).
+    """
+    if isinstance(responses_or_class, list):
+        monkeypatch.setattr(
+            "app.routers.query.LLMClientDP",
+            _make_mock_llm_class([responses_or_class[0]]),
+        )
+        monkeypatch.setattr(
+            "app.routers.query.LLMClient",
+            _make_mock_llm_class(responses_or_class[1:]),
+        )
+    else:
+        monkeypatch.setattr("app.routers.query.LLMClient", responses_or_class)
+        monkeypatch.setattr("app.routers.query.LLMClientDP", responses_or_class)
+
+
 # Standard mock responses for a successful 2-sub-question pipeline
 _STANDARD_RESPONSES = [
     '["What are time extensions?", "What notice is required?"]',
@@ -221,9 +248,7 @@ def test_query_pipeline_creates_history_record(tmp_path, monkeypatch):
     and ``profile_used = "A"``.
     """
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -266,9 +291,7 @@ def test_history_record_contains_prompts(tmp_path, monkeypatch):
     are stored as non-empty strings in the history record.
     """
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -290,9 +313,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch):
     ``<chunk_N>`` tags including Filename, Page, and Content fields.
     """
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -323,9 +344,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch):
 def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch):
     """Verify ``chunks_filtered`` XML contains ``Relevance`` scores."""
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -351,9 +370,7 @@ def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch):
 def test_history_timing_accurate(tmp_path, monkeypatch):
     """Verify all stage timing fields are positive integers."""
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -397,9 +414,7 @@ def test_history_count_fields_are_ints(tmp_path, monkeypatch):
     (scores 8.5, 9.0 > threshold 7.0) → 4 total filtered.
     """
     env = _setup_env(tmp_path, monkeypatch)
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -440,9 +455,7 @@ def test_history_fire_and_forget(tmp_path, monkeypatch):
     if os.path.exists(env["history_db"]):
         os.remove(env["history_db"])
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
-    )
+    _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
     from app.main import app
 
@@ -467,14 +480,29 @@ def test_history_not_created_on_error(tmp_path, monkeypatch):
         async def complete(self, prompt, temperature=0.7, step_name="LLM"):
             self._call_count += 1
             if self._call_count == 1:
-                return '["test question"]'
-            if self._call_count == 2:
                 return '{"0": [8.5, 9.0]}'
             raise RuntimeError("LLM generate error")
 
+        async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+            raise RuntimeError("structured output not mocked")
+
         async def close(self):
             pass
 
+    class _DecomposeOnly:
+        def __init__(self, settings):
+            self.settings = settings
+
+        async def complete(self, prompt, temperature=0.7, step_name="LLM"):
+            return '["test question"]'
+
+        async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+            raise RuntimeError("structured output not mocked")
+
+        async def close(self):
+            pass
+
+    monkeypatch.setattr("app.routers.query.LLMClientDP", _DecomposeOnly)
     monkeypatch.setattr("app.routers.query.LLMClient", _ErrorOnGenerateLLM)
 
     from app.main import app
@@ -506,10 +534,7 @@ class TestPerSubQPipelineHistory:
     def test_per_subq_pipeline_records_history(self, tmp_path, monkeypatch):
         """Per-sub-q pipeline should record history with sub_question_sources."""
         env = _setup_env(tmp_path, monkeypatch)
-        monkeypatch.setattr(
-            "app.routers.query.LLMClient",
-            _make_mock_llm_class(_STANDARD_RESPONSES),
-        )
+        _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
         from app.main import app
 
@@ -537,10 +562,7 @@ class TestPerSubQPipelineHistory:
     def test_per_subq_history_contains_chunk_xml(self, tmp_path, monkeypatch):
         """History should contain XML-tagged chunks_retrieved and chunks_filtered."""
         env = _setup_env(tmp_path, monkeypatch)
-        monkeypatch.setattr(
-            "app.routers.query.LLMClient",
-            _make_mock_llm_class(_STANDARD_RESPONSES),
-        )
+        _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
         from app.main import app
 
@@ -560,10 +582,7 @@ class TestPerSubQPipelineHistory:
     def test_per_subq_history_prompts_are_strings(self, tmp_path, monkeypatch):
         """All prompt fields must be strings (non-empty with real services)."""
         env = _setup_env(tmp_path, monkeypatch)
-        monkeypatch.setattr(
-            "app.routers.query.LLMClient",
-            _make_mock_llm_class(_STANDARD_RESPONSES),
-        )
+        _mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
 
         from app.main import app
 
diff --git a/backend/app/test/test_phase4_integration_query_pipeline.py b/backend/app/test/test_phase4_integration_query_pipeline.py
index 56445bc..9608754 100644
--- a/backend/app/test/test_phase4_integration_query_pipeline.py
+++ b/backend/app/test/test_phase4_integration_query_pipeline.py
@@ -87,12 +87,31 @@ def _make_mock_llm_class(responses):
                 return resp
             raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
 
+        async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
+            raise RuntimeError("structured output not mocked")
+
         async def close(self):
             pass
 
     return _MockLLM
 
 
+def _mock_both_llm_clients(monkeypatch, responses):
+    """Patch both LLMClient and LLMClientDP with the same mock class.
+
+    The first response goes to LLMClientDP (decompose), and the
+    remaining responses go to LLMClient (filter + generate).
+    """
+    monkeypatch.setattr(
+        "app.routers.query.LLMClientDP",
+        _make_mock_llm_class([responses[0]]),
+    )
+    monkeypatch.setattr(
+        "app.routers.query.LLMClient",
+        _make_mock_llm_class(responses[1:]),
+    )
+
+
 def _setup_env(tmp_path, monkeypatch, seed_docs=None):
     """Set up real ChromaDB + SQLite via tmp_path for pipeline tests."""
     seed_docs = seed_docs or SEED_DOCS
@@ -174,10 +193,7 @@ def test_full_pipeline_with_two_subquestions(tmp_path, monkeypatch):
         "- Notify the project manager [NEC4.pdf, page 12]\n"
     )
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient",
-        _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
-    )
+    _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
 
     from app.main import app
 
@@ -223,10 +239,7 @@ def test_pipeline_with_empty_decomposition(tmp_path, monkeypatch):
         "## Sub-question 1: What is the time limit?\n- Answer here\n"
     )
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient",
-        _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
-    )
+    _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
 
     from app.main import app
 
@@ -251,10 +264,7 @@ def test_pipeline_single_subquestion(tmp_path, monkeypatch):
     filter_resp = '{"0": [8.5, 9.0]}'
     generate_resp = "## Sub-question 1: What is X?\n- Answer here\n"
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient",
-        _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
-    )
+    _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
 
     from app.main import app
 
@@ -274,10 +284,7 @@ def test_pipeline_filter_all_rejected(tmp_path, monkeypatch):
     # Both chunks score below threshold 7.0
     filter_resp = '{"0": [2.0, 3.0]}'
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient",
-        _make_mock_llm_class([decompose_resp, filter_resp]),
-    )
+    _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp])
 
     from app.main import app
 
@@ -308,10 +315,7 @@ def test_pipeline_retrieval_empty_for_one_subq(tmp_path, monkeypatch):
         "- No relevant information found.\n"
     )
 
-    monkeypatch.setattr(
-        "app.routers.query.LLMClient",
-        _make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
-    )
+    _mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
 
     from app.main import app