From e78b670baa6c1499b4df4a61d980943c2f9d2516 Mon Sep 17 00:00:00 2001 From: Woody Date: Fri, 24 Apr 2026 17:52:54 +0800 Subject: [PATCH] feat(backend): use [filename, page N] citation labels in RAG context (sub-phase 2.6) Replace numeric [1] labels with [filename, page N] format in context chunks. Update LLM prompt to instruct inline citation using bracket labels. Enables traceable source references in generated answers. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- backend/app/services/rag.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/app/services/rag.py b/backend/app/services/rag.py index bc3a017..b711ff4 100644 --- a/backend/app/services/rag.py +++ b/backend/app/services/rag.py @@ -95,8 +95,10 @@ class RAGService: for i, (chunk, meta) in enumerate(zip(chunks, metadata_list)): source = meta.get("filename", "unknown") summary = meta.get("content_summary", "") + page_num = meta.get("page_number") + citation_label = f"{source}, page {page_num}" if page_num else source context_parts.append( - f"[{i + 1}] Source: {source}\n" + f"[{citation_label}] Source: {source}\n" f"Summary: {summary}\n" f"Content: {chunk}\n" ) @@ -108,7 +110,8 @@ class RAGService: f"Answer the question using ONLY these document chunks. " f"Do not use any external knowledge. " f"Format your answer as bullet points. " - f"Cite the source name in [ ] for each point.\n\n" + f"Cite your sources inline using the exact bracket labels provided, " + f"e.g. [filename, page N]. Place the citation at the end of each relevant point.\n\n" f"Document chunks:\n{context}\n\n" f"Answer:" )