test: update integration mocks for dual-client architecture (Phase 6)
Added complete_structured() to mock classes, split response lists between LLMClientDP (decompose) and LLMClient (filter+generate), and patched both clients in all integration tests. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
3b5bd79839
commit
226f4ed700
|
|
@ -48,6 +48,10 @@ class _MockLLMClient:
|
||||||
return json.dumps({"0": [8.0, 7.5]})
|
return json.dumps({"0": [8.0, 7.5]})
|
||||||
return "- Bullet point answer\n- Another point"
|
return "- Bullet point answer\n- Another point"
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
"""Structured output path — raise to trigger legacy fallback."""
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
|
|
||||||
class _MockLLMClientNoChunks:
|
class _MockLLMClientNoChunks:
|
||||||
"""LLM mock that returns decomposition but no relevant chunks survive filter."""
|
"""LLM mock that returns decomposition but no relevant chunks survive filter."""
|
||||||
|
|
@ -60,6 +64,10 @@ class _MockLLMClientNoChunks:
|
||||||
return json.dumps({"0": [2.0, 1.5]})
|
return json.dumps({"0": [2.0, 1.5]})
|
||||||
return "I could not find any relevant information."
|
return "I could not find any relevant information."
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
"""Structured output path — raise to trigger legacy fallback."""
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
|
|
||||||
class _DeterministicEmbedding:
|
class _DeterministicEmbedding:
|
||||||
"""Lightweight embedding function that returns deterministic vectors.
|
"""Lightweight embedding function that returns deterministic vectors.
|
||||||
|
|
@ -191,6 +199,10 @@ class TestQuery:
|
||||||
"app.routers.query.LLMClient",
|
"app.routers.query.LLMClient",
|
||||||
lambda settings: _MockLLMClient(),
|
lambda settings: _MockLLMClient(),
|
||||||
)
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
lambda settings: _MockLLMClient(),
|
||||||
|
)
|
||||||
|
|
||||||
response = client.post(
|
response = client.post(
|
||||||
"/api/v1/query",
|
"/api/v1/query",
|
||||||
|
|
@ -231,6 +243,10 @@ class TestQuery:
|
||||||
"app.routers.query.LLMClient",
|
"app.routers.query.LLMClient",
|
||||||
lambda settings: _MockLLMClientNoChunks(),
|
lambda settings: _MockLLMClientNoChunks(),
|
||||||
)
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
lambda settings: _MockLLMClientNoChunks(),
|
||||||
|
)
|
||||||
|
|
||||||
response = client.post(
|
response = client.post(
|
||||||
"/api/v1/query",
|
"/api/v1/query",
|
||||||
|
|
@ -256,6 +272,10 @@ class TestQuery:
|
||||||
"app.routers.query.LLMClient",
|
"app.routers.query.LLMClient",
|
||||||
lambda settings: _MockLLMClient(),
|
lambda settings: _MockLLMClient(),
|
||||||
)
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
lambda settings: _MockLLMClient(),
|
||||||
|
)
|
||||||
|
|
||||||
response = client.post(
|
response = client.post(
|
||||||
"/api/v1/query",
|
"/api/v1/query",
|
||||||
|
|
@ -270,6 +290,10 @@ class TestQuery:
|
||||||
"app.routers.query.LLMClient",
|
"app.routers.query.LLMClient",
|
||||||
lambda settings: _MockLLMClient(),
|
lambda settings: _MockLLMClient(),
|
||||||
)
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
lambda settings: _MockLLMClient(),
|
||||||
|
)
|
||||||
|
|
||||||
response = client.post(
|
response = client.post(
|
||||||
"/api/v1/query",
|
"/api/v1/query",
|
||||||
|
|
|
||||||
|
|
@ -98,12 +98,39 @@ def _make_mock_llm_class(responses):
|
||||||
return resp
|
return resp
|
||||||
raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
|
raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return _MockLLM
|
return _MockLLM
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_both_llm_clients(monkeypatch, responses_or_class):
|
||||||
|
"""Patch both LLMClient and LLMClientDP with the same mock.
|
||||||
|
|
||||||
|
Accepts either a list of responses (uses _make_mock_llm_class) or
|
||||||
|
a class directly.
|
||||||
|
|
||||||
|
When a list is provided, the first response goes to LLMClientDP
|
||||||
|
(decompose), and the remaining responses go to LLMClient
|
||||||
|
(filter + generate).
|
||||||
|
"""
|
||||||
|
if isinstance(responses_or_class, list):
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
_make_mock_llm_class([responses_or_class[0]]),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClient",
|
||||||
|
_make_mock_llm_class(responses_or_class[1:]),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
monkeypatch.setattr("app.routers.query.LLMClient", responses_or_class)
|
||||||
|
monkeypatch.setattr("app.routers.query.LLMClientDP", responses_or_class)
|
||||||
|
|
||||||
|
|
||||||
# Standard mock responses for a successful 2-sub-question pipeline
|
# Standard mock responses for a successful 2-sub-question pipeline
|
||||||
_STANDARD_RESPONSES = [
|
_STANDARD_RESPONSES = [
|
||||||
'["What are time extensions?", "What notice is required?"]',
|
'["What are time extensions?", "What notice is required?"]',
|
||||||
|
|
@ -221,9 +248,7 @@ def test_query_pipeline_creates_history_record(tmp_path, monkeypatch):
|
||||||
and ``profile_used = "A"``.
|
and ``profile_used = "A"``.
|
||||||
"""
|
"""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -266,9 +291,7 @@ def test_history_record_contains_prompts(tmp_path, monkeypatch):
|
||||||
are stored as non-empty strings in the history record.
|
are stored as non-empty strings in the history record.
|
||||||
"""
|
"""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -290,9 +313,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch):
|
||||||
``<chunk_N>`` tags including Filename, Page, and Content fields.
|
``<chunk_N>`` tags including Filename, Page, and Content fields.
|
||||||
"""
|
"""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -323,9 +344,7 @@ def test_history_record_contains_chunk_xml(tmp_path, monkeypatch):
|
||||||
def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch):
|
def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch):
|
||||||
"""Verify ``chunks_filtered`` XML contains ``Relevance`` scores."""
|
"""Verify ``chunks_filtered`` XML contains ``Relevance`` scores."""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -351,9 +370,7 @@ def test_history_record_contains_filtered_chunk_xml(tmp_path, monkeypatch):
|
||||||
def test_history_timing_accurate(tmp_path, monkeypatch):
|
def test_history_timing_accurate(tmp_path, monkeypatch):
|
||||||
"""Verify all stage timing fields are positive integers."""
|
"""Verify all stage timing fields are positive integers."""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -397,9 +414,7 @@ def test_history_count_fields_are_ints(tmp_path, monkeypatch):
|
||||||
(scores 8.5, 9.0 > threshold 7.0) → 4 total filtered.
|
(scores 8.5, 9.0 > threshold 7.0) → 4 total filtered.
|
||||||
"""
|
"""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -440,9 +455,7 @@ def test_history_fire_and_forget(tmp_path, monkeypatch):
|
||||||
if os.path.exists(env["history_db"]):
|
if os.path.exists(env["history_db"]):
|
||||||
os.remove(env["history_db"])
|
os.remove(env["history_db"])
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient", _make_mock_llm_class(_STANDARD_RESPONSES)
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -467,14 +480,29 @@ def test_history_not_created_on_error(tmp_path, monkeypatch):
|
||||||
async def complete(self, prompt, temperature=0.7, step_name="LLM"):
|
async def complete(self, prompt, temperature=0.7, step_name="LLM"):
|
||||||
self._call_count += 1
|
self._call_count += 1
|
||||||
if self._call_count == 1:
|
if self._call_count == 1:
|
||||||
return '["test question"]'
|
|
||||||
if self._call_count == 2:
|
|
||||||
return '{"0": [8.5, 9.0]}'
|
return '{"0": [8.5, 9.0]}'
|
||||||
raise RuntimeError("LLM generate error")
|
raise RuntimeError("LLM generate error")
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class _DecomposeOnly:
|
||||||
|
def __init__(self, settings):
|
||||||
|
self.settings = settings
|
||||||
|
|
||||||
|
async def complete(self, prompt, temperature=0.7, step_name="LLM"):
|
||||||
|
return '["test question"]'
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
monkeypatch.setattr("app.routers.query.LLMClientDP", _DecomposeOnly)
|
||||||
monkeypatch.setattr("app.routers.query.LLMClient", _ErrorOnGenerateLLM)
|
monkeypatch.setattr("app.routers.query.LLMClient", _ErrorOnGenerateLLM)
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
@ -506,10 +534,7 @@ class TestPerSubQPipelineHistory:
|
||||||
def test_per_subq_pipeline_records_history(self, tmp_path, monkeypatch):
|
def test_per_subq_pipeline_records_history(self, tmp_path, monkeypatch):
|
||||||
"""Per-sub-q pipeline should record history with sub_question_sources."""
|
"""Per-sub-q pipeline should record history with sub_question_sources."""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class(_STANDARD_RESPONSES),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -537,10 +562,7 @@ class TestPerSubQPipelineHistory:
|
||||||
def test_per_subq_history_contains_chunk_xml(self, tmp_path, monkeypatch):
|
def test_per_subq_history_contains_chunk_xml(self, tmp_path, monkeypatch):
|
||||||
"""History should contain XML-tagged chunks_retrieved and chunks_filtered."""
|
"""History should contain XML-tagged chunks_retrieved and chunks_filtered."""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class(_STANDARD_RESPONSES),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -560,10 +582,7 @@ class TestPerSubQPipelineHistory:
|
||||||
def test_per_subq_history_prompts_are_strings(self, tmp_path, monkeypatch):
|
def test_per_subq_history_prompts_are_strings(self, tmp_path, monkeypatch):
|
||||||
"""All prompt fields must be strings (non-empty with real services)."""
|
"""All prompt fields must be strings (non-empty with real services)."""
|
||||||
env = _setup_env(tmp_path, monkeypatch)
|
env = _setup_env(tmp_path, monkeypatch)
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, _STANDARD_RESPONSES)
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class(_STANDARD_RESPONSES),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -87,12 +87,31 @@ def _make_mock_llm_class(responses):
|
||||||
return resp
|
return resp
|
||||||
raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
|
raise RuntimeError(f"No more mock responses (call #{self._idx + 1})")
|
||||||
|
|
||||||
|
async def complete_structured(self, prompt, pydantic_model, step_name="LLM"):
|
||||||
|
raise RuntimeError("structured output not mocked")
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
return _MockLLM
|
return _MockLLM
|
||||||
|
|
||||||
|
|
||||||
|
def _mock_both_llm_clients(monkeypatch, responses):
|
||||||
|
"""Patch both LLMClient and LLMClientDP with the same mock class.
|
||||||
|
|
||||||
|
The first response goes to LLMClientDP (decompose), and the
|
||||||
|
remaining responses go to LLMClient (filter + generate).
|
||||||
|
"""
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClientDP",
|
||||||
|
_make_mock_llm_class([responses[0]]),
|
||||||
|
)
|
||||||
|
monkeypatch.setattr(
|
||||||
|
"app.routers.query.LLMClient",
|
||||||
|
_make_mock_llm_class(responses[1:]),
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _setup_env(tmp_path, monkeypatch, seed_docs=None):
|
def _setup_env(tmp_path, monkeypatch, seed_docs=None):
|
||||||
"""Set up real ChromaDB + SQLite via tmp_path for pipeline tests."""
|
"""Set up real ChromaDB + SQLite via tmp_path for pipeline tests."""
|
||||||
seed_docs = seed_docs or SEED_DOCS
|
seed_docs = seed_docs or SEED_DOCS
|
||||||
|
|
@ -174,10 +193,7 @@ def test_full_pipeline_with_two_subquestions(tmp_path, monkeypatch):
|
||||||
"- Notify the project manager [NEC4.pdf, page 12]\n"
|
"- Notify the project manager [NEC4.pdf, page 12]\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -223,10 +239,7 @@ def test_pipeline_with_empty_decomposition(tmp_path, monkeypatch):
|
||||||
"## Sub-question 1: What is the time limit?\n- Answer here\n"
|
"## Sub-question 1: What is the time limit?\n- Answer here\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -251,10 +264,7 @@ def test_pipeline_single_subquestion(tmp_path, monkeypatch):
|
||||||
filter_resp = '{"0": [8.5, 9.0]}'
|
filter_resp = '{"0": [8.5, 9.0]}'
|
||||||
generate_resp = "## Sub-question 1: What is X?\n- Answer here\n"
|
generate_resp = "## Sub-question 1: What is X?\n- Answer here\n"
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -274,10 +284,7 @@ def test_pipeline_filter_all_rejected(tmp_path, monkeypatch):
|
||||||
# Both chunks score below threshold 7.0
|
# Both chunks score below threshold 7.0
|
||||||
filter_resp = '{"0": [2.0, 3.0]}'
|
filter_resp = '{"0": [2.0, 3.0]}'
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp])
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class([decompose_resp, filter_resp]),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
@ -308,10 +315,7 @@ def test_pipeline_retrieval_empty_for_one_subq(tmp_path, monkeypatch):
|
||||||
"- No relevant information found.\n"
|
"- No relevant information found.\n"
|
||||||
)
|
)
|
||||||
|
|
||||||
monkeypatch.setattr(
|
_mock_both_llm_clients(monkeypatch, [decompose_resp, filter_resp, generate_resp])
|
||||||
"app.routers.query.LLMClient",
|
|
||||||
_make_mock_llm_class([decompose_resp, filter_resp, generate_resp]),
|
|
||||||
)
|
|
||||||
|
|
||||||
from app.main import app
|
from app.main import app
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue