feat: add chunk PDF serving endpoint and frontend clickable source links (1.5.6)
- Add page_number and chunk_file_path to SourceMetadata model and query router
- Add GET /chunks/{file_path}/pdf endpoint with path traversal protection
- Add View PDF links in ResponsePanel source cards and ChunkList component
- Update TypeScript types and API helper for chunk PDF URLs
- Add backend tests (5) and frontend ChunkList tests (7)
- Update enhancement plan: all 3 features complete
This commit is contained in:
parent
64043b75a7
commit
d49756f374
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
**Source**: User request (2026-04-23)
|
**Source**: User request (2026-04-23)
|
||||||
**Scope**: Frontend navigation + RAG Database management page + page-aware chunking with chunk PDFs
|
**Scope**: Frontend navigation + RAG Database management page + page-aware chunking with chunk PDFs
|
||||||
**Status**: 🔄 In Progress — Features 1-2 ✅ Complete, Feature 3 pending
|
**Status**: ✅ Complete — All 3 features implemented (1.5.1–1.5.6)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -35,8 +35,8 @@ Enhance the existing Phase 1 application with three features:
|
||||||
- ~~No routing or multi-page support~~ ✅ Done in Feature 1
|
- ~~No routing or multi-page support~~ ✅ Done in Feature 1
|
||||||
- ~~No way to view what's stored in ChromaDB~~ ✅ Backend CRUD done (sub-phase 1.5.2)
|
- ~~No way to view what's stored in ChromaDB~~ ✅ Backend CRUD done (sub-phase 1.5.2)
|
||||||
- ~~No way to delete documents or chunks~~ ✅ Backend CRUD done (sub-phase 1.5.2)
|
- ~~No way to delete documents or chunks~~ ✅ Backend CRUD done (sub-phase 1.5.2)
|
||||||
- No page-level awareness in chunking (all pages flattened before token splitting)
|
- ~~No page-level awareness in chunking (all pages flattened before token splitting)~~ ✅ Done in 1.5.4
|
||||||
- No persistent chunk files (chunks only exist as ChromaDB document text)
|
- ~~No persistent chunk files (chunks only exist as ChromaDB document text)~~ ✅ Done in 1.5.5
|
||||||
- No clickable links in RAG responses to view source chunks
|
- No clickable links in RAG responses to view source chunks
|
||||||
- ~~Upload only via IngestPanel on the main query page~~ (IngestPanel stays on LTT, upload also coming to RAG DB page)
|
- ~~Upload only via IngestPanel on the main query page~~ (IngestPanel stays on LTT, upload also coming to RAG DB page)
|
||||||
|
|
||||||
|
|
@ -211,7 +211,7 @@ class DeleteResponse(BaseModel):
|
||||||
- [x] User can delete individual chunks (with confirmation)
|
- [x] User can delete individual chunks (with confirmation)
|
||||||
- [x] User can upload documents from this page
|
- [x] User can upload documents from this page
|
||||||
- [x] Stats displayed: total documents, total chunks
|
- [x] Stats displayed: total documents, total chunks
|
||||||
- [ ] Uploading a file with existing filename triggers automatic replacement (old data deleted first)
|
- [x] Uploading a file with existing filename triggers automatic replacement (old data deleted first)
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -493,19 +493,19 @@ legco_reranker/
|
||||||
|
|
||||||
### 3.5 Acceptance Criteria
|
### 3.5 Acceptance Criteria
|
||||||
|
|
||||||
- [ ] PDF uploads produce page-aware chunks: 1 chunk per page with 200-token overlap from adjacent pages
|
- [x] PDF uploads produce page-aware chunks: 1 chunk per page with 200-token overlap from adjacent pages
|
||||||
- [ ] Each page is saved as a separate PDF (original page, not generated text) in `document_chunk/`
|
- [x] Each page is saved as a separate PDF (original page, not generated text) in `document_chunk/`
|
||||||
- [ ] Chunk PDF filename follows convention: `{filename}_page_{n}.pdf`
|
- [x] Chunk PDF filename follows convention: `{filename}_page_{n}.pdf`
|
||||||
- [ ] Page numbers are sequential index (1, 2, 3...), not PDF internal labels
|
- [x] Page numbers are sequential index (1, 2, 3...), not PDF internal labels
|
||||||
- [ ] Oversized pages are kept as single chunks (never split)
|
- [x] Oversized pages are kept as single chunks (never split)
|
||||||
- [ ] `GET /api/v1/chunks/{file_path}/pdf` serves the original chunk PDF
|
- [x] `GET /api/v1/chunks/{file_path}/pdf` serves the original chunk PDF
|
||||||
- [ ] RAG response sources include `page_number` and `chunk_file_path`
|
- [x] RAG response sources include `page_number` and `chunk_file_path`
|
||||||
- [ ] Frontend source cards show page number and clickable link
|
- [x] Frontend source cards show page number and clickable link
|
||||||
- [ ] Clicking source link opens/downloads the original chunk PDF
|
- [x] Clicking source link opens/downloads the original chunk PDF
|
||||||
- [ ] DOCX uploads work without page numbers (graceful degradation, no chunk PDFs)
|
- [x] DOCX uploads work without page numbers (graceful degradation, no chunk PDFs)
|
||||||
- [ ] Uploading a file with same filename replaces existing document (old chunks + PDFs deleted, new document_id)
|
- [x] Uploading a file with same filename replaces existing document (old chunks + PDFs deleted, new document_id)
|
||||||
- [ ] `document_chunk/` is `.gitignore`d
|
- [x] `document_chunk/` is `.gitignore`d
|
||||||
- [ ] Deleting a document also removes its chunk PDFs from `document_chunk/`
|
- [x] Deleting a document also removes its chunk PDFs from `document_chunk/`
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
|
@ -534,7 +534,7 @@ Feature 3 (Page-Aware Chunking) ← Modifies ingestion pipeline
|
||||||
| 1.5.3 | 2 | Frontend RAG Database page | None | RAGDatabasePage, DocumentList, ChunkList, DocumentUpload, API hooks | ✅ Complete |
|
| 1.5.3 | 2 | Frontend RAG Database page | None | RAGDatabasePage, DocumentList, ChunkList, DocumentUpload, API hooks | ✅ Complete |
|
||||||
| 1.5.4 | 3 | Page-aware parsing & chunking | pdf_parser, chunking, metadata enhancements | None | ✅ Complete |
|
| 1.5.4 | 3 | Page-aware parsing & chunking | pdf_parser, chunking, metadata enhancements | None | ✅ Complete |
|
||||||
| 1.5.5 | 3 | Chunk PDF generation & storage | pdf_extractor, config, ingest pipeline refactor | None | ✅ Complete |
|
| 1.5.5 | 3 | Chunk PDF generation & storage | pdf_extractor, config, ingest pipeline refactor | None | ✅ Complete |
|
||||||
| 1.5.6 | 3 | Chunk file serving + frontend links | documents router endpoint | ResponsePanel clickable links, ChunkList updates | 📋 Pending |
|
| 1.5.6 | 3 | Chunk file serving + frontend links | documents router endpoint | ResponsePanel clickable links, ChunkList updates | ✅ Complete |
|
||||||
|
|
||||||
### Parallelization Opportunities
|
### Parallelization Opportunities
|
||||||
|
|
||||||
|
|
@ -619,7 +619,7 @@ None — all resolved.
|
||||||
| `test_phase1_pdf_parser_pages.py` | parse_pdf_by_page() — multi-page PDFs, single-page, empty |
|
| `test_phase1_pdf_parser_pages.py` | parse_pdf_by_page() — multi-page PDFs, single-page, empty |
|
||||||
| `test_phase1_page_aware_chunking.py` | chunk_with_pages() — cross-page chunks, single-page chunks |
|
| `test_phase1_page_aware_chunking.py` | chunk_with_pages() — cross-page chunks, single-page chunks |
|
||||||
| `test_phase1_pdf_extractor.py` | extract_page_as_pdf() — valid page, out-of-range, corrupt PDF |
|
| `test_phase1_pdf_extractor.py` | extract_page_as_pdf() — valid page, out-of-range, corrupt PDF |
|
||||||
| `test_phase1_chunk_serving.py` | GET /chunks/{path}/pdf — valid file, missing file, path traversal |
|
| `test_phase1_chunk_serving.py` | ✅ GET /chunks/{path}/pdf — valid file, missing file, path traversal, filenames with spaces (5 tests, all pass) |
|
||||||
|
|
||||||
### Frontend Tests (New Files)
|
### Frontend Tests (New Files)
|
||||||
|
|
||||||
|
|
@ -628,7 +628,7 @@ None — all resolved.
|
||||||
| `NavBar.test.tsx` | Navigation links, active state |
|
| `NavBar.test.tsx` | Navigation links, active state |
|
||||||
| `RAGDatabasePage.test.tsx` | Document list, delete, upload |
|
| `RAGDatabasePage.test.tsx` | Document list, delete, upload |
|
||||||
| `DocumentList.test.tsx` | Document cards, expand/collapse |
|
| `DocumentList.test.tsx` | Document cards, expand/collapse |
|
||||||
| `ChunkList.test.tsx` | Chunk table, page numbers, PDF links |
|
| `ChunkList.test.tsx` | ✅ Chunk table, page numbers, PDF links (7 tests, all pass) |
|
||||||
|
|
||||||
### Acceptance Tests
|
### Acceptance Tests
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,5 @@
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -6,3 +8,5 @@ class SourceMetadata(BaseModel):
|
||||||
upload_date: str
|
upload_date: str
|
||||||
content_summary: str
|
content_summary: str
|
||||||
chunk_index: int
|
chunk_index: int
|
||||||
|
page_number: Optional[int] = None
|
||||||
|
chunk_file_path: Optional[str] = None
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from fastapi import APIRouter, HTTPException
|
from fastapi import APIRouter, HTTPException
|
||||||
|
from fastapi.responses import FileResponse
|
||||||
|
|
||||||
from app.models.documents import (
|
from app.models.documents import (
|
||||||
DocumentInfo,
|
DocumentInfo,
|
||||||
|
|
@ -126,3 +127,30 @@ async def delete_chunk(chunk_id: str):
|
||||||
deleted=True,
|
deleted=True,
|
||||||
message=f"Deleted chunk {chunk_id}",
|
message=f"Deleted chunk {chunk_id}",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@router.get("/chunks/{file_path:path}/pdf")
|
||||||
|
async def get_chunk_pdf(file_path: str):
|
||||||
|
"""Serve a chunk PDF file from document_chunk/ directory."""
|
||||||
|
from app.core.config import get_settings
|
||||||
|
|
||||||
|
# Path traversal protection
|
||||||
|
if ".." in file_path:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid file path")
|
||||||
|
|
||||||
|
settings = get_settings()
|
||||||
|
base_dir = os.path.realpath(settings.document_chunk_path)
|
||||||
|
full_path = os.path.realpath(os.path.join(base_dir, file_path))
|
||||||
|
|
||||||
|
# Ensure resolved path is within base directory
|
||||||
|
if not full_path.startswith(base_dir + os.sep) and full_path != base_dir:
|
||||||
|
raise HTTPException(status_code=400, detail="Invalid file path")
|
||||||
|
|
||||||
|
if not os.path.isfile(full_path):
|
||||||
|
raise HTTPException(status_code=404, detail=f"Chunk file not found: {file_path}")
|
||||||
|
|
||||||
|
return FileResponse(
|
||||||
|
full_path,
|
||||||
|
media_type="application/pdf",
|
||||||
|
filename=os.path.basename(full_path),
|
||||||
|
)
|
||||||
|
|
|
||||||
|
|
@ -59,6 +59,8 @@ async def query(request: QueryRequest):
|
||||||
upload_date=meta.get("upload_date", ""),
|
upload_date=meta.get("upload_date", ""),
|
||||||
content_summary=meta.get("content_summary", ""),
|
content_summary=meta.get("content_summary", ""),
|
||||||
chunk_index=meta.get("chunk_index", 0),
|
chunk_index=meta.get("chunk_index", 0),
|
||||||
|
page_number=meta.get("page_number"),
|
||||||
|
chunk_file_path=meta.get("chunk_file_path"),
|
||||||
)
|
)
|
||||||
for meta in chunk_metadata
|
for meta in chunk_metadata
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,73 @@
|
||||||
|
"""Tests for chunk PDF file serving endpoint.
|
||||||
|
|
||||||
|
Coverage:
|
||||||
|
- GET /api/v1/chunks/{file_path}/pdf — success, 404, path traversal 400
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from unittest.mock import patch
|
||||||
|
|
||||||
|
from fastapi.testclient import TestClient
|
||||||
|
|
||||||
|
from app.main import app
|
||||||
|
|
||||||
|
|
||||||
|
class TestChunkServing(unittest.TestCase):
|
||||||
|
"""Test GET /api/v1/chunks/{file_path}/pdf endpoint."""
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
self.client = TestClient(app)
|
||||||
|
|
||||||
|
def test_get_chunk_pdf_success(self):
|
||||||
|
"""Should serve chunk PDF file with 200 and application/pdf."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
test_file = os.path.join(tmp_dir, "test_page_1.pdf")
|
||||||
|
with open(test_file, "wb") as f:
|
||||||
|
f.write(b"%PDF-1.4 fake content")
|
||||||
|
|
||||||
|
with patch("app.core.config.get_settings") as mock_settings:
|
||||||
|
mock_settings.return_value.document_chunk_path = tmp_dir
|
||||||
|
response = self.client.get("/api/v1/chunks/test_page_1.pdf/pdf")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
self.assertIn("application/pdf", response.headers["content-type"])
|
||||||
|
|
||||||
|
def test_get_chunk_pdf_not_found(self):
|
||||||
|
"""Should return 404 for non-existent chunk file."""
|
||||||
|
with patch("app.core.config.get_settings") as mock_settings:
|
||||||
|
mock_settings.return_value.document_chunk_path = "/tmp/nonexistent_chunk_dir"
|
||||||
|
response = self.client.get("/api/v1/chunks/nonexistent.pdf/pdf")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 404)
|
||||||
|
|
||||||
|
def test_get_chunk_pdf_path_traversal_double_dot(self):
|
||||||
|
"""Should reject path traversal with .. (404 due to Starlette normalization)."""
|
||||||
|
with patch("app.core.config.get_settings") as mock_settings:
|
||||||
|
mock_settings.return_value.document_chunk_path = "/tmp/fake_chunk_dir"
|
||||||
|
response = self.client.get("/api/v1/chunks/../etc/passwd/pdf")
|
||||||
|
|
||||||
|
self.assertIn(response.status_code, [400, 404])
|
||||||
|
|
||||||
|
def test_get_chunk_pdf_path_traversal_symlink_escape(self):
|
||||||
|
"""Should reject resolved path escaping base directory (404 from normalization)."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
with patch("app.core.config.get_settings") as mock_settings:
|
||||||
|
mock_settings.return_value.document_chunk_path = tmp_dir
|
||||||
|
response = self.client.get("/api/v1/chunks/../../etc/passwd/pdf")
|
||||||
|
|
||||||
|
self.assertIn(response.status_code, [400, 404])
|
||||||
|
|
||||||
|
def test_get_chunk_pdf_with_spaces_in_filename(self):
|
||||||
|
"""Should serve files with spaces in the filename."""
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
test_file = os.path.join(tmp_dir, "NEC4 ACC_page_3.pdf")
|
||||||
|
with open(test_file, "wb") as f:
|
||||||
|
f.write(b"%PDF-1.4 fake content")
|
||||||
|
|
||||||
|
with patch("app.core.config.get_settings") as mock_settings:
|
||||||
|
mock_settings.return_value.document_chunk_path = tmp_dir
|
||||||
|
response = self.client.get("/api/v1/chunks/NEC4 ACC_page_3.pdf/pdf")
|
||||||
|
|
||||||
|
self.assertEqual(response.status_code, 200)
|
||||||
|
self.assertIn("application/pdf", response.headers["content-type"])
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
import React from 'react'
|
import React from 'react'
|
||||||
import { Trash2 } from 'lucide-react'
|
import { Trash2 } from 'lucide-react'
|
||||||
import type { ChunkInfo } from '../types'
|
import type { ChunkInfo } from '../types'
|
||||||
|
import { getChunkPdfUrl } from '../lib/api'
|
||||||
|
|
||||||
interface ChunkListProps {
|
interface ChunkListProps {
|
||||||
chunks: ChunkInfo[]
|
chunks: ChunkInfo[]
|
||||||
|
|
@ -64,6 +65,17 @@ export const ChunkList: React.FC<ChunkListProps> = ({
|
||||||
? `${chunk.content_summary.slice(0, 100)}...`
|
? `${chunk.content_summary.slice(0, 100)}...`
|
||||||
: chunk.content_summary}
|
: chunk.content_summary}
|
||||||
</div>
|
</div>
|
||||||
|
{chunk.chunk_file_path && (
|
||||||
|
<a
|
||||||
|
href={getChunkPdfUrl(chunk.chunk_file_path)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="inline-flex items-center mt-1 text-xs text-blue-600 hover:text-blue-800 hover:underline"
|
||||||
|
data-testid="view-chunk-pdf-link"
|
||||||
|
>
|
||||||
|
View PDF
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
</div>
|
</div>
|
||||||
<button
|
<button
|
||||||
data-testid="delete-chunk-btn"
|
data-testid="delete-chunk-btn"
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ import React, { useState } from 'react'
|
||||||
import { MessageSquare, AlertCircle, Copy, ChevronDown, ChevronRight } from 'lucide-react'
|
import { MessageSquare, AlertCircle, Copy, ChevronDown, ChevronRight } from 'lucide-react'
|
||||||
import ReactMarkdown from 'react-markdown'
|
import ReactMarkdown from 'react-markdown'
|
||||||
import type { SourceMetadata } from '../types'
|
import type { SourceMetadata } from '../types'
|
||||||
|
import { getChunkPdfUrl } from '../lib/api'
|
||||||
|
|
||||||
interface ResponsePanelProps {
|
interface ResponsePanelProps {
|
||||||
answer: string | null
|
answer: string | null
|
||||||
|
|
@ -133,10 +134,30 @@ export const ResponsePanel: React.FC<ResponsePanelProps> = ({
|
||||||
key={index}
|
key={index}
|
||||||
className="border border-gray-200 rounded p-3 bg-gray-50"
|
className="border border-gray-200 rounded p-3 bg-gray-50"
|
||||||
>
|
>
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
<div className="font-medium text-sm">{source.filename}</div>
|
<div className="font-medium text-sm">{source.filename}</div>
|
||||||
|
{source.page_number !== null && (
|
||||||
|
<span className="text-xs text-blue-600 font-medium">
|
||||||
|
Page {source.page_number}
|
||||||
|
</span>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
<div className="text-sm text-gray-500">{source.upload_date}</div>
|
<div className="text-sm text-gray-500">{source.upload_date}</div>
|
||||||
<div className="text-sm text-gray-600 mt-1">{source.content_summary}</div>
|
<div className="text-sm text-gray-600 mt-1">{source.content_summary}</div>
|
||||||
<div className="text-xs text-gray-400 mt-1">Chunk {source.chunk_index}</div>
|
<div className="flex items-center justify-between mt-1">
|
||||||
|
<div className="text-xs text-gray-400">Chunk {source.chunk_index}</div>
|
||||||
|
{source.chunk_file_path && (
|
||||||
|
<a
|
||||||
|
href={getChunkPdfUrl(source.chunk_file_path)}
|
||||||
|
target="_blank"
|
||||||
|
rel="noopener noreferrer"
|
||||||
|
className="text-xs text-blue-600 hover:text-blue-800 hover:underline"
|
||||||
|
data-testid="view-chunk-pdf-link"
|
||||||
|
>
|
||||||
|
View PDF
|
||||||
|
</a>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
))}
|
))}
|
||||||
</div>
|
</div>
|
||||||
|
|
|
||||||
|
|
@ -38,3 +38,8 @@ export const deleteChunk = async (chunkId: string): Promise<DeleteResponse> => {
|
||||||
const resp = await apiClient.delete<DeleteResponse>(`/chunks/${chunkId}`)
|
const resp = await apiClient.delete<DeleteResponse>(`/chunks/${chunkId}`)
|
||||||
return resp.data
|
return resp.data
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export const getChunkPdfUrl = (filePath: string): string => {
|
||||||
|
const baseUrl: string = import.meta.env.VITE_API_BASE_URL ?? 'http://localhost:8000/api/v1'
|
||||||
|
return `${baseUrl}/chunks/${filePath}/pdf`
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,136 @@
|
||||||
|
import { describe, it, expect, vi } from 'vitest'
|
||||||
|
import { render, screen, fireEvent } from '@testing-library/react'
|
||||||
|
import { ChunkList } from '../../components/ChunkList'
|
||||||
|
import type { ChunkInfo } from '../../types'
|
||||||
|
|
||||||
|
vi.mock('../../lib/api', () => ({
|
||||||
|
getChunkPdfUrl: (path: string) => `http://localhost:8000/api/v1/chunks/${path}/pdf`,
|
||||||
|
}))
|
||||||
|
|
||||||
|
const mockChunks: ChunkInfo[] = [
|
||||||
|
{
|
||||||
|
chunk_id: 'doc1_0',
|
||||||
|
chunk_index: 0,
|
||||||
|
content_summary: 'Summary of chunk 0 content...',
|
||||||
|
page_number: 1,
|
||||||
|
chunk_file_path: 'test_page_1.pdf',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunk_id: 'doc1_1',
|
||||||
|
chunk_index: 1,
|
||||||
|
content_summary: 'Summary of chunk 1 content that is quite long and should be truncated when displayed in the component',
|
||||||
|
page_number: 2,
|
||||||
|
chunk_file_path: 'test_page_2.pdf',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
chunk_id: 'doc1_2',
|
||||||
|
chunk_index: 2,
|
||||||
|
content_summary: 'A DOCX chunk without page info',
|
||||||
|
page_number: null,
|
||||||
|
chunk_file_path: null,
|
||||||
|
},
|
||||||
|
]
|
||||||
|
|
||||||
|
describe('ChunkList', () => {
|
||||||
|
it('renders chunk rows with chunk index and page number', () => {
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={mockChunks}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(screen.getByText(/Chunk 0/)).toBeInTheDocument()
|
||||||
|
expect(screen.getByText(/Chunk 1/)).toBeInTheDocument()
|
||||||
|
expect(screen.getByText(/Chunk 2/)).toBeInTheDocument()
|
||||||
|
expect(screen.getByText(/Page: 1/)).toBeInTheDocument()
|
||||||
|
expect(screen.getByText(/Page: 2/)).toBeInTheDocument()
|
||||||
|
expect(screen.getByText(/Page: N\/A/)).toBeInTheDocument()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('renders View PDF links for chunks with chunk_file_path', () => {
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={mockChunks}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
const links = screen.getAllByTestId('view-chunk-pdf-link')
|
||||||
|
expect(links).toHaveLength(2)
|
||||||
|
expect(links[0]).toHaveAttribute('href', 'http://localhost:8000/api/v1/chunks/test_page_1.pdf/pdf')
|
||||||
|
expect(links[0]).toHaveAttribute('target', '_blank')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('does not render View PDF link for chunks without chunk_file_path', () => {
|
||||||
|
const docxChunk = [mockChunks[2]]
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={docxChunk}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(screen.queryByTestId('view-chunk-pdf-link')).not.toBeInTheDocument()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('calls onDeleteChunk when delete button is clicked', () => {
|
||||||
|
const onDelete = vi.fn()
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={[mockChunks[0]]}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={onDelete}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
fireEvent.click(screen.getByTestId('delete-chunk-btn'))
|
||||||
|
expect(onDelete).toHaveBeenCalledWith('doc1_0')
|
||||||
|
})
|
||||||
|
|
||||||
|
it('shows loading skeletons when isLoading is true', () => {
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={[]}
|
||||||
|
isLoading={true}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(screen.getAllByTestId('skeleton-line').length).toBeGreaterThan(0)
|
||||||
|
})
|
||||||
|
|
||||||
|
it('shows empty message when no chunks', () => {
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={[]}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={false}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(screen.getByText('No chunks found')).toBeInTheDocument()
|
||||||
|
})
|
||||||
|
|
||||||
|
it('disables delete button when isDeleting is true', () => {
|
||||||
|
render(
|
||||||
|
<ChunkList
|
||||||
|
chunks={[mockChunks[0]]}
|
||||||
|
isLoading={false}
|
||||||
|
onDeleteChunk={vi.fn()}
|
||||||
|
isDeleting={true}
|
||||||
|
/>
|
||||||
|
)
|
||||||
|
|
||||||
|
expect(screen.getByTestId('delete-chunk-btn')).toBeDisabled()
|
||||||
|
})
|
||||||
|
})
|
||||||
|
|
@ -10,12 +10,16 @@ describe('ResponsePanel', () => {
|
||||||
upload_date: '2024-01-15',
|
upload_date: '2024-01-15',
|
||||||
content_summary: 'Introduction to RAG systems',
|
content_summary: 'Introduction to RAG systems',
|
||||||
chunk_index: 0,
|
chunk_index: 0,
|
||||||
|
page_number: 1,
|
||||||
|
chunk_file_path: 'test_chunk_1.pdf',
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
filename: 'document2.txt',
|
filename: 'document2.txt',
|
||||||
upload_date: '2024-01-16',
|
upload_date: '2024-01-16',
|
||||||
content_summary: 'Advanced retrieval techniques',
|
content_summary: 'Advanced retrieval techniques',
|
||||||
chunk_index: 1,
|
chunk_index: 1,
|
||||||
|
page_number: null,
|
||||||
|
chunk_file_path: null,
|
||||||
},
|
},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,8 @@ const mockQueryResponse: QueryResponse = {
|
||||||
upload_date: '2024-01-15',
|
upload_date: '2024-01-15',
|
||||||
content_summary: 'Meeting minutes',
|
content_summary: 'Meeting minutes',
|
||||||
chunk_index: 0,
|
chunk_index: 0,
|
||||||
|
page_number: 1,
|
||||||
|
chunk_file_path: 'test_chunk.pdf',
|
||||||
},
|
},
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,8 @@ export interface SourceMetadata {
|
||||||
upload_date: string
|
upload_date: string
|
||||||
content_summary: string
|
content_summary: string
|
||||||
chunk_index: number
|
chunk_index: number
|
||||||
|
page_number: number | null
|
||||||
|
chunk_file_path: string | null
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface QueryRequest {
|
export interface QueryRequest {
|
||||||
|
|
|
||||||
Binary file not shown.
Loading…
Reference in New Issue