feat: Sub-Phase 8.0 — config & enums for Q&A-pair chunking strategy

Backend: - Add 6 Q&A chunking config fields to Settings (default_chunking_strategy, qa_vision_enabled, qa_max_chunk_tokens, qa_structure_model, qa_include_internal_refs, qa_cache_vision_results) - Define ChunkingStrategyType Literal + VALID_CHUNKING_STRATEGIES frozenset - Add strategy field to IngestResponse (default token, non-breaking) - Add IngestRequest model with strategy param - Update .env.example with new env vars Frontend: - Add ChunkingStrategy type ('token' | 'question') - Extend IngestResponse, DocumentInfo, ChunkInfo with Q&A fields Tests: - test_qa_chunking_config_defaults — all defaults verified - test_qa_chunking_config_from_env — env var overrides verified Plan fix: renamed qa_verification_model → qa_structure_model to match LLM-first architecture
2026-05-15 12:01:28 +08:00 · 2026-05-15 12:01:28 +08:00 · ef10b937cf
parent 6bf04cedb1
commit ef10b937cf
6 changed files with 85 additions and 3 deletions
--- a/.plans/package8_enhancement_plan.md
+++ b/.plans/package8_enhancement_plan.md
@ -327,7 +327,7 @@ For each section in the JSON response:
 If `qa_vision_enabled=false` or for cost optimization, use regex as a fast pre-filter. The LLM call is skipped for documents where regex confidently identifies all boundaries, and used only when:
 - No regex pattern matches (unknown format)
 - Regex produces < 2 sections (likely misdetection)
- `qa_verification_model` is not set to `"none"`
+- `qa_structure_model` is not set to `"none"`
 ### Algorithm Detail: Table-to-Markdown
@ -382,7 +382,7 @@ class Settings(BaseSettings):
    # NEW: Q&A chunking config
    qa_vision_enabled: bool = True   # Toggle vision-based table extraction (uses existing LLM_MODEL_NAME)
    qa_max_chunk_tokens: int = 3000  # Max tokens per Q&A chunk (before forced split)
-    qa_verification_model: str = ""  # LLM for boundary verification (empty = use LLM_MODEL_NAME)
+    qa_structure_model: str = ""  # LLM for structure detection (empty = use LLM_MODEL_NAME)
    qa_include_internal_refs: bool = True  # Include [內部參考] in chunks
    qa_cache_vision_results: bool = True   # Cache vision results per page
@ -390,7 +390,7 @@ class Settings(BaseSettings):
    # DEFAULT_CHUNKING_STRATEGY=token
    # QA_VISION_ENABLED=true
    # QA_MAX_CHUNK_TOKENS=3000
-    # QA_VERIFICATION_MODEL=
+    # QA_STRUCTURE_MODEL=
    # QA_INCLUDE_INTERNAL_REFS=true
    # QA_CACHE_VISION_RESULTS=true
--- a/backend/.env.example
+++ b/backend/.env.example
@ -41,3 +41,11 @@ MAX_VIDEO_SIZE_MB=300
 # Set to false to disable System Audio or Listen Mic capture
 SYSTEM_AUDIO_ENABLED=true
 MIC_ENABLED=true
 # Q&A-pair chunking (Package 8)
 DEFAULT_CHUNKING_STRATEGY=token
 QA_VISION_ENABLED=true
 QA_MAX_CHUNK_TOKENS=3000
 QA_STRUCTURE_MODEL=
 QA_INCLUDE_INTERNAL_REFS=true
 QA_CACHE_VISION_RESULTS=true
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@ -44,6 +44,14 @@ class Settings(BaseSettings):
    relevance_threshold: float = 7.0
    llm_timeout: float = 60.0
    # Q&A-pair chunking strategy (Package 8)
    default_chunking_strategy: str = "token"
    qa_vision_enabled: bool = True
    qa_max_chunk_tokens: int = 3000
    qa_structure_model: str = ""
    qa_include_internal_refs: bool = True
    qa_cache_vision_results: bool = True
    # Alibaba Cloud DashScope ASR (Phase 2)
    dashscope_api_key: str = ""
    asr_model_name: str = "qwen3-asr-flash"
--- a/backend/app/models/ingest.py
+++ b/backend/app/models/ingest.py
@ -1,7 +1,18 @@
 from typing import Literal
 from pydantic import BaseModel
 ChunkingStrategyType = Literal["token", "question"]
 VALID_CHUNKING_STRATEGIES = frozenset({"token", "question"})
 class IngestRequest(BaseModel):
    strategy: ChunkingStrategyType = "token"
 class IngestResponse(BaseModel):
    document_id: str
    chunk_count: int
    filename: str
    strategy: ChunkingStrategyType = "token"
--- a/backend/app/test/test_phase1_config.py
+++ b/backend/app/test/test_phase1_config.py
@ -31,3 +31,47 @@ def test_config_default_values(monkeypatch):
    settings = Settings()
    assert settings.llm_base_url == "https://openrouter.ai/api/v1"
    assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b"
 def test_qa_chunking_config_defaults(monkeypatch):
    """Phase 8.0: Q&A chunking config fields have correct defaults."""
    monkeypatch.delenv("DEFAULT_CHUNKING_STRATEGY", raising=False)
    monkeypatch.delenv("QA_VISION_ENABLED", raising=False)
    monkeypatch.delenv("QA_MAX_CHUNK_TOKENS", raising=False)
    monkeypatch.delenv("QA_STRUCTURE_MODEL", raising=False)
    monkeypatch.delenv("QA_INCLUDE_INTERNAL_REFS", raising=False)
    monkeypatch.delenv("QA_CACHE_VISION_RESULTS", raising=False)
    from app.core.config import Settings
    settings = Settings()
    assert settings.default_chunking_strategy == "token"
    assert settings.qa_vision_enabled is True
    assert settings.qa_max_chunk_tokens == 3000
    assert settings.qa_structure_model == ""
    assert settings.qa_include_internal_refs is True
    assert settings.qa_cache_vision_results is True
 def test_qa_chunking_config_from_env(tmp_path, monkeypatch):
    """Phase 8.0: Q&A chunking config fields load from .env."""
    env_file = tmp_path / ".env"
    env_file.write_text(
        "DEFAULT_CHUNKING_STRATEGY=question\n"
        "QA_VISION_ENABLED=false\n"
        "QA_MAX_CHUNK_TOKENS=5000\n"
        "QA_STRUCTURE_MODEL=anthropic/claude-3-haiku\n"
        "QA_INCLUDE_INTERNAL_REFS=false\n"
        "QA_CACHE_VISION_RESULTS=false\n"
    )
    monkeypatch.chdir(tmp_path)
    from app.core.config import Settings
    settings = Settings()
    assert settings.default_chunking_strategy == "question"
    assert settings.qa_vision_enabled is False
    assert settings.qa_max_chunk_tokens == 5000
    assert settings.qa_structure_model == "anthropic/claude-3-haiku"
    assert settings.qa_include_internal_refs is False
    assert settings.qa_cache_vision_results is False
--- a/frontend/src/types/index.ts
+++ b/frontend/src/types/index.ts
@ -1,3 +1,5 @@
 export type ChunkingStrategy = 'token' | 'question'
 export interface SourceMetadata {
  filename: string
  upload_date: string
@ -40,6 +42,7 @@ export interface IngestResponse {
  document_id: string
  chunk_count: number
  filename: string
  strategy: ChunkingStrategy
 }
 export interface DocumentInfo {
@ -47,6 +50,7 @@ export interface DocumentInfo {
  filename: string
  chunk_count: number
  upload_date: string
  chunking_strategy: ChunkingStrategy
 }
 export interface ChunkInfo {
@ -55,6 +59,13 @@ export interface ChunkInfo {
  content_summary: string
  page_number: number | null
  chunk_file_path: string | null
  strategy_type: ChunkingStrategy
  question_index: number | null
  question_id: string | null
  question_text: string | null
  topic_section: string | null
  source_page_range: number[] | null
  has_table: boolean | null
 }
 export interface DocumentListResponse {