test(backend): add Phase 4 unit tests for retrieval and filtering
10 tests for retrieve_per_subquestion() covering multi-sub-q, empty, single, call counting, n_results passthrough, and empty results. 14 tests for filter_per_subquestion() covering basic filtering, threshold behavior, JSON parsing edge cases, markdown extraction, LLM exceptions, and format helpers. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
0ecae11bf8
commit
ab6ec28de6
|
|
@ -0,0 +1,102 @@
|
||||||
|
"""Tests for format_chunks_filtered_per_subq() helper — Phase 4.2.
|
||||||
|
|
||||||
|
Covers the XML formatting of per-sub-question filtered chunks:
|
||||||
|
- Normal multi-sub-question output
|
||||||
|
- Empty results
|
||||||
|
- Single sub-question
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.routers.query import format_chunks_filtered_per_subq
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_chunks_filtered_per_subq():
|
||||||
|
"""Per-sub-q filtered results produce XML with <sub_q> wrappers and Relevance."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"What is A?",
|
||||||
|
[
|
||||||
|
("chunk A1 text", {"filename": "a.pdf", "relevance_score": 8.5}),
|
||||||
|
("chunk A2 text", {"filename": "a2.pdf", "relevance_score": 3.2}),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"What is B?",
|
||||||
|
[
|
||||||
|
("chunk B1 text", {"filename": "b.pdf", "page_number": 5, "relevance_score": 9.0}),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_filtered_per_subq(results)
|
||||||
|
|
||||||
|
# Sub-question wrappers
|
||||||
|
assert '<sub_q idx="0" question="What is A?">' in xml
|
||||||
|
assert '<sub_q idx="1" question="What is B?">' in xml
|
||||||
|
assert "</sub_q>" in xml
|
||||||
|
|
||||||
|
# Chunks with relevance
|
||||||
|
assert "<chunk_1>" in xml
|
||||||
|
assert "<chunk_2>" in xml
|
||||||
|
assert "Relevance: 8.5" in xml
|
||||||
|
assert "Relevance: 9.0" in xml
|
||||||
|
assert "Relevance: 3.2" in xml
|
||||||
|
assert "Filename: a.pdf" in xml
|
||||||
|
assert "Filename: b.pdf" in xml
|
||||||
|
assert "Page: 5" in xml
|
||||||
|
|
||||||
|
# Content present
|
||||||
|
assert "chunk A1 text" in xml
|
||||||
|
assert "chunk B1 text" in xml
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_chunks_filtered_per_subq_empty():
|
||||||
|
"""Empty results list → empty string."""
|
||||||
|
assert format_chunks_filtered_per_subq([]) == ""
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_chunks_filtered_per_subq_single_subq():
|
||||||
|
"""Single sub-question with one chunk."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"Only question?",
|
||||||
|
[
|
||||||
|
("only chunk text", {"filename": "doc.pdf", "relevance_score": 9.5}),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_filtered_per_subq(results)
|
||||||
|
|
||||||
|
assert '<sub_q idx="0" question="Only question?">' in xml
|
||||||
|
assert "<chunk_1>" in xml
|
||||||
|
assert "Relevance: 9.5" in xml
|
||||||
|
assert "only chunk text" in xml
|
||||||
|
assert "</sub_q>" in xml
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_chunks_filtered_per_subq_no_page_number():
|
||||||
|
"""Chunk without page_number should not include Page: line."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"Q?",
|
||||||
|
[("text", {"filename": "f.pdf", "relevance_score": 8.0})],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_filtered_per_subq(results)
|
||||||
|
assert "Page:" not in xml
|
||||||
|
assert "Relevance: 8.0" in xml
|
||||||
|
|
||||||
|
|
||||||
|
def test_format_chunks_filtered_per_subq_no_relevance_score():
|
||||||
|
"""Chunk without relevance_score should show N/A."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"Q?",
|
||||||
|
[("text", {"filename": "f.pdf"})],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_filtered_per_subq(results)
|
||||||
|
assert "Relevance: N/A" in xml
|
||||||
|
|
@ -0,0 +1,78 @@
|
||||||
|
"""Phase 4 tests: Per-sub-question XML formatting in query router.
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- format_chunks_retrieved_per_subq with multiple sub-questions
|
||||||
|
- format_chunks_retrieved_per_subq with empty results
|
||||||
|
- format_chunks_retrieved_per_subq with single sub-question
|
||||||
|
- XML special character escaping in question attributes
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from app.routers.query import format_chunks_retrieved_per_subq
|
||||||
|
|
||||||
|
|
||||||
|
class TestFormatChunksRetrievedPerSubq:
|
||||||
|
"""Tests for format_chunks_retrieved_per_subq()."""
|
||||||
|
|
||||||
|
def test_format_chunks_retrieved_per_subq(self):
|
||||||
|
"""Multiple sub-questions produce nested <sub_q> wrappers with chunks."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"What is A?",
|
||||||
|
[
|
||||||
|
("chunk A1", {"filename": "a.pdf"}, 0.1),
|
||||||
|
("chunk A2", {"filename": "a2.pdf", "page_number": 3}, 0.2),
|
||||||
|
],
|
||||||
|
),
|
||||||
|
(
|
||||||
|
"What is B?",
|
||||||
|
[("chunk B1", {"filename": "b.pdf"}, 0.3)],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_retrieved_per_subq(results)
|
||||||
|
|
||||||
|
assert '<sub_q idx="0" question="What is A?">' in xml
|
||||||
|
assert '<sub_q idx="1" question="What is B?">' in xml
|
||||||
|
assert "</sub_q>" in xml
|
||||||
|
assert "<chunk_1>" in xml
|
||||||
|
assert "<chunk_2>" in xml
|
||||||
|
assert "chunk A1" in xml
|
||||||
|
assert "chunk A2" in xml
|
||||||
|
assert "chunk B1" in xml
|
||||||
|
assert "a.pdf" in xml
|
||||||
|
assert "Page: 3" in xml
|
||||||
|
|
||||||
|
def test_format_chunks_retrieved_per_subq_empty(self):
|
||||||
|
"""Empty results list produces empty string."""
|
||||||
|
xml = format_chunks_retrieved_per_subq([])
|
||||||
|
assert xml == ""
|
||||||
|
|
||||||
|
def test_format_chunks_retrieved_per_subq_single_subq(self):
|
||||||
|
"""Single sub-question produces single <sub_q> wrapper."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
"Only one?",
|
||||||
|
[("chunk X", {"filename": "x.pdf"}, 0.1)],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_retrieved_per_subq(results)
|
||||||
|
|
||||||
|
assert '<sub_q idx="0" question="Only one?">' in xml
|
||||||
|
assert "</sub_q>" in xml
|
||||||
|
assert "chunk X" in xml
|
||||||
|
assert xml.count("<sub_q") == 1
|
||||||
|
|
||||||
|
def test_format_chunks_retrieved_per_subq_escapes_xml(self):
|
||||||
|
"""Special XML characters in question text are preserved in output."""
|
||||||
|
results = [
|
||||||
|
(
|
||||||
|
'What about "quotes" & <brackets>?',
|
||||||
|
[("data", {"filename": "f.pdf"}, 0.1)],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
xml = format_chunks_retrieved_per_subq(results)
|
||||||
|
|
||||||
|
assert 'question="What about "quotes" & <brackets>?"' in xml
|
||||||
|
|
@ -0,0 +1,214 @@
|
||||||
|
"""Tests for RelevanceFilter.filter_per_subquestion() — Phase 4.2.
|
||||||
|
|
||||||
|
Covers per-sub-question chunk filtering in a single LLM call:
|
||||||
|
- Basic scoring and threshold filtering per sub-question
|
||||||
|
- Empty inputs and edge cases
|
||||||
|
- Invalid JSON / score-count mismatch error handling
|
||||||
|
- Threshold boundary behaviour (strict >)
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import AsyncMock, MagicMock
|
||||||
|
|
||||||
|
from app.services.relevance_filter import RelevanceFilter
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: basic per-sub-question filtering
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_basic(mock_prompt_service):
|
||||||
|
"""Two sub-questions, LLM returns per-sub-q scores, threshold filters correctly."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(return_value='{"0": [8.5, 3.2], "1": [9.0]}')
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?", "What is B?"],
|
||||||
|
[
|
||||||
|
[("chunk A1", {"filename": "a.pdf"}), ("chunk A2", {"filename": "a2.pdf"})],
|
||||||
|
[("chunk B1", {"filename": "b.pdf"})],
|
||||||
|
],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Structure check
|
||||||
|
assert len(results) == 2
|
||||||
|
assert results[0][0] == "What is A?"
|
||||||
|
assert results[1][0] == "What is B?"
|
||||||
|
|
||||||
|
# Sub-q 0: only score 8.5 passes threshold > 7.0
|
||||||
|
assert len(results[0][1]) == 1
|
||||||
|
assert results[0][1][0][0] == "chunk A1"
|
||||||
|
assert results[0][1][0][1]["relevance_score"] == 8.5
|
||||||
|
assert results[0][1][0][1]["filename"] == "a.pdf"
|
||||||
|
|
||||||
|
# Sub-q 1: score 9.0 passes
|
||||||
|
assert len(results[1][1]) == 1
|
||||||
|
assert results[1][1][0][0] == "chunk B1"
|
||||||
|
assert results[1][1][0][1]["relevance_score"] == 9.0
|
||||||
|
|
||||||
|
# Prompt contains sub-question labels
|
||||||
|
assert prompt != ""
|
||||||
|
assert "Sub-question 0" in prompt
|
||||||
|
assert "Sub-question 1" in prompt
|
||||||
|
llm.complete.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: empty input
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_empty_input(mock_prompt_service):
|
||||||
|
"""Empty sub_questions list returns ([], '')."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock()
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion([], [], threshold=7.0)
|
||||||
|
|
||||||
|
assert results == []
|
||||||
|
assert prompt == ""
|
||||||
|
llm.complete.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: sub-questions with all-empty chunk lists
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_all_empty_chunks(mock_prompt_service):
|
||||||
|
"""Two sub-questions, both with empty chunk lists → empty filtered lists."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock()
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?", "What is B?"],
|
||||||
|
[[], []],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
assert results[0][0] == "What is A?"
|
||||||
|
assert results[0][1] == []
|
||||||
|
assert results[1][0] == "What is B?"
|
||||||
|
assert results[1][1] == []
|
||||||
|
# No LLM call needed when all chunk lists are empty
|
||||||
|
llm.complete.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: LLM returns invalid JSON
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_llm_returns_invalid_json(mock_prompt_service):
|
||||||
|
"""LLM returns non-JSON string → returns ([], prompt)."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(return_value="not json at all")
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?"],
|
||||||
|
[[("chunk A1", {"filename": "a.pdf"})]],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert results == []
|
||||||
|
assert prompt != ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: score count mismatch
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_score_count_mismatch(mock_prompt_service):
|
||||||
|
"""Sub-q 0 has 2 chunks but LLM returns only 1 score → returns ([], prompt)."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(return_value='{"0": [8.5]}')
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?"],
|
||||||
|
[[("chunk A1", {"filename": "a.pdf"}), ("chunk A2", {"filename": "a2.pdf"})]],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert results == []
|
||||||
|
assert prompt != ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: strict threshold boundary
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_passes_threshold_correctly(mock_prompt_service):
|
||||||
|
"""Score == threshold is NOT kept (strict >). Score > threshold IS kept."""
|
||||||
|
llm = MagicMock()
|
||||||
|
# Sub-q 0: scores [7.0, 7.1] with threshold 7.0 → only 7.1 kept
|
||||||
|
llm.complete = AsyncMock(return_value='{"0": [7.0, 7.1]}')
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["Boundary test?"],
|
||||||
|
[[("exact threshold", {"filename": "f1.pdf"}), ("above threshold", {"filename": "f2.pdf"})]],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert len(results[0][1]) == 1
|
||||||
|
assert results[0][1][0][0] == "above threshold"
|
||||||
|
assert results[0][1][0][1]["relevance_score"] == 7.1
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: LLM exception
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_llm_exception(mock_prompt_service):
|
||||||
|
"""LLM call raises an exception → returns ([], '')."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(side_effect=RuntimeError("LLM unavailable"))
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?"],
|
||||||
|
[[("chunk A1", {"filename": "a.pdf"})]],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert results == []
|
||||||
|
assert prompt != ""
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: JSON wrapped in markdown code block
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_json_in_markdown_code_block(mock_prompt_service):
|
||||||
|
"""LLM returns JSON inside ```json ... ``` block → should parse correctly."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(return_value='```json\n{"0": [9.0]}\n```')
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?"],
|
||||||
|
[[("chunk A1", {"filename": "a.pdf"})]],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert len(results[0][1]) == 1
|
||||||
|
assert results[0][1][0][1]["relevance_score"] == 9.0
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Test: mixed empty and non-empty sub-questions
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
async def test_filter_per_subq_mixed_empty_and_nonempty(mock_prompt_service):
|
||||||
|
"""One sub-q with chunks, one without. Only non-empty ones get scored."""
|
||||||
|
llm = MagicMock()
|
||||||
|
llm.complete = AsyncMock(return_value='{"0": [8.5]}')
|
||||||
|
|
||||||
|
rf = RelevanceFilter(llm, prompt_service=mock_prompt_service)
|
||||||
|
results, prompt = await rf.filter_per_subquestion(
|
||||||
|
["What is A?", "What is B?"],
|
||||||
|
[[("chunk A1", {"filename": "a.pdf"})], []],
|
||||||
|
threshold=7.0,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
assert len(results[0][1]) == 1
|
||||||
|
assert results[0][1][0][1]["relevance_score"] == 8.5
|
||||||
|
assert results[1][1] == []
|
||||||
|
|
@ -0,0 +1,132 @@
|
||||||
|
"""Phase 4 tests: Per-sub-question retrieval in RAGService.
|
||||||
|
|
||||||
|
Covers:
|
||||||
|
- retrieve_per_subquestion() with multiple sub-questions
|
||||||
|
- retrieve_per_subquestion() with empty input
|
||||||
|
- retrieve_per_subquestion() with single sub-question
|
||||||
|
- Verify retrieve() is called once per sub-question
|
||||||
|
- n_results parameter passthrough
|
||||||
|
- Handling of empty results for individual sub-questions
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
from unittest.mock import MagicMock
|
||||||
|
|
||||||
|
from app.services.rag import RAGService
|
||||||
|
|
||||||
|
|
||||||
|
class TestRetrievePerSubquestion:
|
||||||
|
"""Tests for RAGService.retrieve_per_subquestion()."""
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _make_service() -> RAGService:
|
||||||
|
"""Create a RAGService with a mocked collection."""
|
||||||
|
mock_collection = MagicMock()
|
||||||
|
mock_client = MagicMock()
|
||||||
|
mock_client.get_or_create_collection.return_value = mock_collection
|
||||||
|
service = RAGService(chroma_client=mock_client)
|
||||||
|
service._collection = mock_collection
|
||||||
|
return service
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_two_subqs(self):
|
||||||
|
"""Two sub-questions should each return their own chunks."""
|
||||||
|
service = self._make_service()
|
||||||
|
service._collection.query.side_effect = [
|
||||||
|
{
|
||||||
|
"documents": [["chunk A1", "chunk A2"]],
|
||||||
|
"metadatas": [[{"filename": "a.pdf"}, {"filename": "a2.pdf"}]],
|
||||||
|
"distances": [[0.1, 0.2]],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"documents": [["chunk B1"]],
|
||||||
|
"metadatas": [[{"filename": "b.pdf"}]],
|
||||||
|
"distances": [[0.3]],
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
results = service.retrieve_per_subquestion(
|
||||||
|
["What is A?", "What is B?"], n_results=5
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
assert results[0][0] == "What is A?"
|
||||||
|
assert len(results[0][1]) == 2
|
||||||
|
assert results[0][1][0] == ("chunk A1", {"filename": "a.pdf"}, 0.1)
|
||||||
|
assert results[0][1][1] == ("chunk A2", {"filename": "a2.pdf"}, 0.2)
|
||||||
|
|
||||||
|
assert results[1][0] == "What is B?"
|
||||||
|
assert len(results[1][1]) == 1
|
||||||
|
assert results[1][1][0] == ("chunk B1", {"filename": "b.pdf"}, 0.3)
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_empty_list(self):
|
||||||
|
"""Empty sub_questions list returns empty list."""
|
||||||
|
service = self._make_service()
|
||||||
|
results = service.retrieve_per_subquestion([], n_results=10)
|
||||||
|
assert results == []
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_single_subq(self):
|
||||||
|
"""Single sub-question returns a single-element result list."""
|
||||||
|
service = self._make_service()
|
||||||
|
service._collection.query.return_value = {
|
||||||
|
"documents": [["chunk X"]],
|
||||||
|
"metadatas": [[{"filename": "x.pdf"}]],
|
||||||
|
"distances": [[0.05]],
|
||||||
|
}
|
||||||
|
|
||||||
|
results = service.retrieve_per_subquestion(["Only question"], n_results=3)
|
||||||
|
|
||||||
|
assert len(results) == 1
|
||||||
|
assert results[0][0] == "Only question"
|
||||||
|
assert len(results[0][1]) == 1
|
||||||
|
assert results[0][1][0] == ("chunk X", {"filename": "x.pdf"}, 0.05)
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_calls_retrieve_n_times(self):
|
||||||
|
"""retrieve() should be called once per sub-question with correct args."""
|
||||||
|
service = self._make_service()
|
||||||
|
|
||||||
|
# Mock retrieve to return empty chunks so we can spy on calls
|
||||||
|
service.retrieve = MagicMock(return_value=[])
|
||||||
|
|
||||||
|
sub_questions = ["Q1", "Q2", "Q3"]
|
||||||
|
service.retrieve_per_subquestion(sub_questions, n_results=7)
|
||||||
|
|
||||||
|
assert service.retrieve.call_count == 3
|
||||||
|
service.retrieve.assert_any_call(["Q1"], n_results=7)
|
||||||
|
service.retrieve.assert_any_call(["Q2"], n_results=7)
|
||||||
|
service.retrieve.assert_any_call(["Q3"], n_results=7)
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_preserves_n_results(self):
|
||||||
|
"""n_results parameter is passed through to each retrieve() call."""
|
||||||
|
service = self._make_service()
|
||||||
|
service.retrieve = MagicMock(return_value=[])
|
||||||
|
|
||||||
|
service.retrieve_per_subquestion(["Q1"], n_results=42)
|
||||||
|
|
||||||
|
service.retrieve.assert_called_once_with(["Q1"], n_results=42)
|
||||||
|
|
||||||
|
def test_retrieve_per_subquestion_handles_empty_results(self):
|
||||||
|
"""One sub-q returns no results, another returns results."""
|
||||||
|
service = self._make_service()
|
||||||
|
|
||||||
|
# First call returns empty, second returns data
|
||||||
|
service.retrieve = MagicMock(
|
||||||
|
side_effect=[
|
||||||
|
[],
|
||||||
|
[("chunk B", {"filename": "b.pdf"}, 0.2)],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
|
||||||
|
results = service.retrieve_per_subquestion(
|
||||||
|
["No results Q", "Has results Q"], n_results=5
|
||||||
|
)
|
||||||
|
|
||||||
|
assert len(results) == 2
|
||||||
|
|
||||||
|
# First sub-question has empty chunks
|
||||||
|
assert results[0][0] == "No results Q"
|
||||||
|
assert results[0][1] == []
|
||||||
|
|
||||||
|
# Second sub-question has chunks
|
||||||
|
assert results[1][0] == "Has results Q"
|
||||||
|
assert len(results[1][1]) == 1
|
||||||
|
assert results[1][1][0] == ("chunk B", {"filename": "b.pdf"}, 0.2)
|
||||||
Loading…
Reference in New Issue