fix(relevance): tolerate LLM score count mismatches via padding instead of discarding
The per-sub-question filter was all-or-nothing: if the LLM returned 9 scores for 10 chunks (common with qwen3.5-35b), every chunk was discarded and the user got 'no relevant information found'. Now: fewer scores → pad with 0.0; more scores → truncate. Changed from error→warning since this is recoverable. Also improve LTT page UI: sources collapsed by default in per-sub-q sections, and the 'Your question' text now shows the full question instead of being truncated. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
2656f9ca08
commit
a7a22f1494
|
|
@ -212,15 +212,23 @@ class RelevanceFilter:
|
||||||
key = str(idx)
|
key = str(idx)
|
||||||
if len(sub_chunks[idx]) == 0:
|
if len(sub_chunks[idx]) == 0:
|
||||||
continue
|
continue
|
||||||
if key not in score_map or len(score_map[key]) != len(sub_chunks[idx]):
|
if key not in score_map:
|
||||||
logger.error(
|
logger.error("RelevanceFilter per-subq: no scores for sub-q %d", idx)
|
||||||
"RelevanceFilter per-subq score count mismatch for sub-q %d: "
|
|
||||||
"expected %d scores, got %d",
|
|
||||||
idx, len(sub_chunks[idx]),
|
|
||||||
len(score_map.get(key, [])),
|
|
||||||
)
|
|
||||||
return [], prompt
|
return [], prompt
|
||||||
|
|
||||||
|
expected = len(sub_chunks[idx])
|
||||||
|
actual = len(score_map[key])
|
||||||
|
if actual != expected:
|
||||||
|
logger.warning(
|
||||||
|
"RelevanceFilter per-subq score count mismatch for sub-q %d: "
|
||||||
|
"expected %d scores, got %d — padding with 0.0",
|
||||||
|
idx, expected, actual,
|
||||||
|
)
|
||||||
|
if actual < expected:
|
||||||
|
score_map[key].extend([0.0] * (expected - actual))
|
||||||
|
else:
|
||||||
|
score_map[key] = score_map[key][:expected]
|
||||||
|
|
||||||
filtered_results: List[Tuple[str, List[Tuple[str, Dict]]]] = []
|
filtered_results: List[Tuple[str, List[Tuple[str, Dict]]]] = []
|
||||||
for idx, (sq, chunks) in enumerate(zip(sub_questions, sub_chunks)):
|
for idx, (sq, chunks) in enumerate(zip(sub_questions, sub_chunks)):
|
||||||
scores = score_map.get(str(idx), [])
|
scores = score_map.get(str(idx), [])
|
||||||
|
|
|
||||||
|
|
@ -170,7 +170,7 @@ async def test_filter_per_subq_llm_returns_invalid_json(tmp_path):
|
||||||
# Test: score count mismatch
|
# Test: score count mismatch
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
async def test_filter_per_subq_score_count_mismatch(tmp_path):
|
async def test_filter_per_subq_score_count_mismatch(tmp_path):
|
||||||
"""Sub-q 0 has 2 chunks but LLM returns only 1 score → returns ([], prompt)."""
|
"""Sub-q 0 has 2 chunks but LLM returns only 1 score — pads with 0.0, keeps high-scored chunk."""
|
||||||
from app.services.relevance_filter import RelevanceFilter
|
from app.services.relevance_filter import RelevanceFilter
|
||||||
|
|
||||||
llm = _MockLLM(response='{"0": [8.5]}')
|
llm = _MockLLM(response='{"0": [8.5]}')
|
||||||
|
|
@ -183,7 +183,12 @@ async def test_filter_per_subq_score_count_mismatch(tmp_path):
|
||||||
threshold=7.0,
|
threshold=7.0,
|
||||||
)
|
)
|
||||||
|
|
||||||
assert results == []
|
assert len(results) == 1
|
||||||
|
sq, chunks = results[0]
|
||||||
|
assert sq == "What is A?"
|
||||||
|
assert len(chunks) == 1
|
||||||
|
assert chunks[0][0] == "chunk A1"
|
||||||
|
assert chunks[0][1]["relevance_score"] == 8.5
|
||||||
assert prompt != ""
|
assert prompt != ""
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ export const QueryInput: React.FC<QueryInputProps> = ({ onSubmit, isLoading }) =
|
||||||
{isLoading ? 'Processing...' : 'Submit'}
|
{isLoading ? 'Processing...' : 'Submit'}
|
||||||
</button>
|
</button>
|
||||||
{submittedQuestion && (
|
{submittedQuestion && (
|
||||||
<p data-testid="submitted-question" className="text-sm text-gray-500 italic truncate">
|
<p data-testid="submitted-question" className="text-sm text-gray-500 italic break-words">
|
||||||
Your question: “{submittedQuestion}”
|
Your question: “{submittedQuestion}”
|
||||||
</p>
|
</p>
|
||||||
)}
|
)}
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ function SubQuestionSection({
|
||||||
subQuestion: SubQuestionSources
|
subQuestion: SubQuestionSources
|
||||||
answerSection: string
|
answerSection: string
|
||||||
}) {
|
}) {
|
||||||
const [expanded, setExpanded] = useState(true)
|
const [expanded, setExpanded] = useState(false)
|
||||||
const processedAnswer = processCitationsForSubq(answerSection, [subQuestion], 0)
|
const processedAnswer = processCitationsForSubq(answerSection, [subQuestion], 0)
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
|
||||||
|
|
@ -80,13 +80,14 @@ describe('ResponsePanel — per-sub-question rendering (Phase 4)', () => {
|
||||||
const toggles = screen.getAllByTestId('sources-toggle')
|
const toggles = screen.getAllByTestId('sources-toggle')
|
||||||
expect(toggles).toHaveLength(2)
|
expect(toggles).toHaveLength(2)
|
||||||
expect(toggles[0]).toHaveTextContent('Sources (1)')
|
expect(toggles[0]).toHaveTextContent('Sources (1)')
|
||||||
expect(screen.getAllByTestId('sources-container')).toHaveLength(2)
|
|
||||||
|
|
||||||
fireEvent.click(toggles[1])
|
// Default: both collapsed (hidden)
|
||||||
const sourceCards = screen.getAllByTestId('sources-container')
|
expect(screen.queryAllByTestId('sources-container')).toHaveLength(0)
|
||||||
expect(sourceCards).toHaveLength(1)
|
|
||||||
|
// Click first toggle to expand
|
||||||
|
fireEvent.click(toggles[0])
|
||||||
|
expect(screen.getAllByTestId('sources-container')).toHaveLength(1)
|
||||||
expect(screen.getByText(/Page 3/)).toBeInTheDocument()
|
expect(screen.getByText(/Page 3/)).toBeInTheDocument()
|
||||||
expect(screen.queryByText(/Page 7/)).not.toBeInTheDocument()
|
|
||||||
})
|
})
|
||||||
|
|
||||||
it('falls back to flat rendering when subQuestionSources is null', () => {
|
it('falls back to flat rendering when subQuestionSources is null', () => {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue