fix(relevance): tolerate LLM score count mismatches via padding instead of discarding
The per-sub-question filter was all-or-nothing: if the LLM returned 9 scores for 10 chunks (common with qwen3.5-35b), every chunk was discarded and the user got 'no relevant information found'. Now: fewer scores → pad with 0.0; more scores → truncate. Changed from error→warning since this is recoverable. Also improve LTT page UI: sources collapsed by default in per-sub-q sections, and the 'Your question' text now shows the full question instead of being truncated. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
2656f9ca08
commit
a7a22f1494
|
|
@ -212,15 +212,23 @@ class RelevanceFilter:
|
|||
key = str(idx)
|
||||
if len(sub_chunks[idx]) == 0:
|
||||
continue
|
||||
if key not in score_map or len(score_map[key]) != len(sub_chunks[idx]):
|
||||
logger.error(
|
||||
"RelevanceFilter per-subq score count mismatch for sub-q %d: "
|
||||
"expected %d scores, got %d",
|
||||
idx, len(sub_chunks[idx]),
|
||||
len(score_map.get(key, [])),
|
||||
)
|
||||
if key not in score_map:
|
||||
logger.error("RelevanceFilter per-subq: no scores for sub-q %d", idx)
|
||||
return [], prompt
|
||||
|
||||
expected = len(sub_chunks[idx])
|
||||
actual = len(score_map[key])
|
||||
if actual != expected:
|
||||
logger.warning(
|
||||
"RelevanceFilter per-subq score count mismatch for sub-q %d: "
|
||||
"expected %d scores, got %d — padding with 0.0",
|
||||
idx, expected, actual,
|
||||
)
|
||||
if actual < expected:
|
||||
score_map[key].extend([0.0] * (expected - actual))
|
||||
else:
|
||||
score_map[key] = score_map[key][:expected]
|
||||
|
||||
filtered_results: List[Tuple[str, List[Tuple[str, Dict]]]] = []
|
||||
for idx, (sq, chunks) in enumerate(zip(sub_questions, sub_chunks)):
|
||||
scores = score_map.get(str(idx), [])
|
||||
|
|
|
|||
|
|
@ -170,7 +170,7 @@ async def test_filter_per_subq_llm_returns_invalid_json(tmp_path):
|
|||
# Test: score count mismatch
|
||||
# ---------------------------------------------------------------------------
|
||||
async def test_filter_per_subq_score_count_mismatch(tmp_path):
|
||||
"""Sub-q 0 has 2 chunks but LLM returns only 1 score → returns ([], prompt)."""
|
||||
"""Sub-q 0 has 2 chunks but LLM returns only 1 score — pads with 0.0, keeps high-scored chunk."""
|
||||
from app.services.relevance_filter import RelevanceFilter
|
||||
|
||||
llm = _MockLLM(response='{"0": [8.5]}')
|
||||
|
|
@ -183,7 +183,12 @@ async def test_filter_per_subq_score_count_mismatch(tmp_path):
|
|||
threshold=7.0,
|
||||
)
|
||||
|
||||
assert results == []
|
||||
assert len(results) == 1
|
||||
sq, chunks = results[0]
|
||||
assert sq == "What is A?"
|
||||
assert len(chunks) == 1
|
||||
assert chunks[0][0] == "chunk A1"
|
||||
assert chunks[0][1]["relevance_score"] == 8.5
|
||||
assert prompt != ""
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ export const QueryInput: React.FC<QueryInputProps> = ({ onSubmit, isLoading }) =
|
|||
{isLoading ? 'Processing...' : 'Submit'}
|
||||
</button>
|
||||
{submittedQuestion && (
|
||||
<p data-testid="submitted-question" className="text-sm text-gray-500 italic truncate">
|
||||
<p data-testid="submitted-question" className="text-sm text-gray-500 italic break-words">
|
||||
Your question: “{submittedQuestion}”
|
||||
</p>
|
||||
)}
|
||||
|
|
|
|||
|
|
@ -74,7 +74,7 @@ function SubQuestionSection({
|
|||
subQuestion: SubQuestionSources
|
||||
answerSection: string
|
||||
}) {
|
||||
const [expanded, setExpanded] = useState(true)
|
||||
const [expanded, setExpanded] = useState(false)
|
||||
const processedAnswer = processCitationsForSubq(answerSection, [subQuestion], 0)
|
||||
|
||||
return (
|
||||
|
|
|
|||
|
|
@ -80,13 +80,14 @@ describe('ResponsePanel — per-sub-question rendering (Phase 4)', () => {
|
|||
const toggles = screen.getAllByTestId('sources-toggle')
|
||||
expect(toggles).toHaveLength(2)
|
||||
expect(toggles[0]).toHaveTextContent('Sources (1)')
|
||||
expect(screen.getAllByTestId('sources-container')).toHaveLength(2)
|
||||
|
||||
fireEvent.click(toggles[1])
|
||||
const sourceCards = screen.getAllByTestId('sources-container')
|
||||
expect(sourceCards).toHaveLength(1)
|
||||
// Default: both collapsed (hidden)
|
||||
expect(screen.queryAllByTestId('sources-container')).toHaveLength(0)
|
||||
|
||||
// Click first toggle to expand
|
||||
fireEvent.click(toggles[0])
|
||||
expect(screen.getAllByTestId('sources-container')).toHaveLength(1)
|
||||
expect(screen.getByText(/Page 3/)).toBeInTheDocument()
|
||||
expect(screen.queryByText(/Page 7/)).not.toBeInTheDocument()
|
||||
})
|
||||
|
||||
it('falls back to flat rendering when subQuestionSources is null', () => {
|
||||
|
|
|
|||
Loading…
Reference in New Issue