import type { SourceMetadata, SubQuestionSources } from '../types' import { getPdfViewerUrl } from '../lib/api' export function bulletizeMarkdown(text: string): string { if (!text.trim()) return '' const lines = text.split('\n') const hasBullets = lines.some( (line) => /^(\s*[-*+]|\s*\d+[.)]\s)/.test(line.trimStart()) ) if (hasBullets) return text const paragraphs = text.split(/\n{2,}/).filter((p) => p.trim()) return paragraphs.map((p) => `- ${p.replace(/\n/g, ' ').trim()}`).join('\n') } const SUPPORTED_EXTENSIONS = /\.(pdf|docx|txt)$/i function stripExtension(filename: string): string { return filename.replace(SUPPORTED_EXTENSIONS, '').trim() } function buildCitationLookup(sources: SourceMetadata[]): Map { const lookup = new Map() for (const source of sources) { const fname = source.filename.trim() lookup.set(fname.toLowerCase(), source) if (source.page_number !== null) { lookup.set(`${fname}, page ${source.page_number}`.toLowerCase(), source) } const stripped = stripExtension(fname) if (stripped !== fname) { lookup.set(stripped.toLowerCase(), source) if (source.page_number !== null) { lookup.set(`${stripped}, page ${source.page_number}`.toLowerCase(), source) } } } return lookup } export function buildCitationLookupForSubq( subQuestionSources: SubQuestionSources[], subqIndex: number ): Map { const sources = subQuestionSources[subqIndex]?.sources ?? [] return buildCitationLookup(sources) } export function processCitationsForSubq( answerSection: string, subQuestionSources: SubQuestionSources[], subqIndex: number, highlightKeys?: Set ): string { const lookup = buildCitationLookupForSubq(subQuestionSources, subqIndex) return replaceCitationPatterns(answerSection, lookup, highlightKeys) } function buildCitationUrl(source: SourceMetadata, highlightReady?: boolean): string | null { if (highlightReady && source.document_id && source.sub_question_text) { const v2Base = `${import.meta.env.VITE_API_BASE_URL ?? '/api/v1'}/v2` return `${v2Base}/highlights?document_id=${encodeURIComponent(source.document_id)}&chunk_index=${source.chunk_index}&sub_question=${encodeURIComponent(source.sub_question_text)}` } if (source.chunk_file_path) { return getPdfViewerUrl( source.chunk_file_path, source.page_number ?? undefined, source.filename ) } if (source.document_id) { return `/rag-database?document=${encodeURIComponent(source.document_id)}` } return null } function findSource( citationText: string, lookup: Map ): SourceMetadata | undefined { const trimmed = citationText.trim().toLowerCase() let source = lookup.get(trimmed) if (!source) { const pageMatch = trimmed.match(/^(.+?),\s*page\s+(\d+)$/i) if (pageMatch) { source = lookup.get(pageMatch[1].trim().toLowerCase()) } } if (!source) { const strippedCitation = stripExtension(trimmed) if (strippedCitation !== trimmed) { source = lookup.get(strippedCitation) } } return source } function replaceCitationPatterns( text: string, lookup: Map, highlightKeys?: Set ): string { const citationPattern = /(? { const trimmed = content.trim() const source = findSource(trimmed, lookup) if (source) { let isReady = false if (highlightKeys && source.document_id && source.sub_question_text) { isReady = highlightKeys.has( `${source.document_id}_${source.chunk_index}_${encodeURIComponent(source.sub_question_text)}` ) } const url = buildCitationUrl(source, isReady) if (url) { refCounter++ return `[${refCounter}](${url})` } } return fullMatch }) } export function processCitations(text: string, sources: SourceMetadata[], highlightKeys?: Set): string { if (!sources.length) return text const lookup = buildCitationLookup(sources) return replaceCitationPatterns(text, lookup, highlightKeys) } export function extractCitedSources(answerText: string, sources: SourceMetadata[]): SourceMetadata[] { if (!answerText.trim() || !sources.length) return [] const lookup = buildCitationLookup(sources) const citationPattern = /(?() const result: SourceMetadata[] = [] let match: RegExpExecArray | null while ((match = citationPattern.exec(answerText)) !== null) { const content = match[1].trim() const source = findSource(content, lookup) if (source) { const key = `${source.document_id}_${source.chunk_index}` if (!seen.has(key)) { seen.add(key) result.push(source) } } } return result } export function highlightTerms(markdown: string): string { const parts = markdown.split(/(`[^`]*`)/) return parts .map((part, index) => { if (index % 2 === 1) return part return part.replace(/(?$1') }) .join('') }