feat: Sub-Phase 8.0 — config & enums for Q&A-pair chunking strategy

Backend:
- Add 6 Q&A chunking config fields to Settings (default_chunking_strategy,
  qa_vision_enabled, qa_max_chunk_tokens, qa_structure_model,
  qa_include_internal_refs, qa_cache_vision_results)
- Define ChunkingStrategyType Literal + VALID_CHUNKING_STRATEGIES frozenset
- Add strategy field to IngestResponse (default token, non-breaking)
- Add IngestRequest model with strategy param
- Update .env.example with new env vars

Frontend:
- Add ChunkingStrategy type ('token' | 'question')
- Extend IngestResponse, DocumentInfo, ChunkInfo with Q&A fields

Tests:
- test_qa_chunking_config_defaults — all defaults verified
- test_qa_chunking_config_from_env — env var overrides verified

Plan fix: renamed qa_verification_model → qa_structure_model to match
LLM-first architecture
This commit is contained in:
Woody 2026-05-15 12:01:28 +08:00
parent 6bf04cedb1
commit ef10b937cf
6 changed files with 85 additions and 3 deletions

View File

@ -327,7 +327,7 @@ For each section in the JSON response:
If `qa_vision_enabled=false` or for cost optimization, use regex as a fast pre-filter. The LLM call is skipped for documents where regex confidently identifies all boundaries, and used only when:
- No regex pattern matches (unknown format)
- Regex produces < 2 sections (likely misdetection)
- `qa_verification_model` is not set to `"none"`
- `qa_structure_model` is not set to `"none"`
### Algorithm Detail: Table-to-Markdown
@ -382,7 +382,7 @@ class Settings(BaseSettings):
# NEW: Q&A chunking config
qa_vision_enabled: bool = True # Toggle vision-based table extraction (uses existing LLM_MODEL_NAME)
qa_max_chunk_tokens: int = 3000 # Max tokens per Q&A chunk (before forced split)
qa_verification_model: str = "" # LLM for boundary verification (empty = use LLM_MODEL_NAME)
qa_structure_model: str = "" # LLM for structure detection (empty = use LLM_MODEL_NAME)
qa_include_internal_refs: bool = True # Include [內部參考] in chunks
qa_cache_vision_results: bool = True # Cache vision results per page
@ -390,7 +390,7 @@ class Settings(BaseSettings):
# DEFAULT_CHUNKING_STRATEGY=token
# QA_VISION_ENABLED=true
# QA_MAX_CHUNK_TOKENS=3000
# QA_VERIFICATION_MODEL=
# QA_STRUCTURE_MODEL=
# QA_INCLUDE_INTERNAL_REFS=true
# QA_CACHE_VISION_RESULTS=true

View File

@ -41,3 +41,11 @@ MAX_VIDEO_SIZE_MB=300
# Set to false to disable System Audio or Listen Mic capture
SYSTEM_AUDIO_ENABLED=true
MIC_ENABLED=true
# Q&A-pair chunking (Package 8)
DEFAULT_CHUNKING_STRATEGY=token
QA_VISION_ENABLED=true
QA_MAX_CHUNK_TOKENS=3000
QA_STRUCTURE_MODEL=
QA_INCLUDE_INTERNAL_REFS=true
QA_CACHE_VISION_RESULTS=true

View File

@ -44,6 +44,14 @@ class Settings(BaseSettings):
relevance_threshold: float = 7.0
llm_timeout: float = 60.0
# Q&A-pair chunking strategy (Package 8)
default_chunking_strategy: str = "token"
qa_vision_enabled: bool = True
qa_max_chunk_tokens: int = 3000
qa_structure_model: str = ""
qa_include_internal_refs: bool = True
qa_cache_vision_results: bool = True
# Alibaba Cloud DashScope ASR (Phase 2)
dashscope_api_key: str = ""
asr_model_name: str = "qwen3-asr-flash"

View File

@ -1,7 +1,18 @@
from typing import Literal
from pydantic import BaseModel
ChunkingStrategyType = Literal["token", "question"]
VALID_CHUNKING_STRATEGIES = frozenset({"token", "question"})
class IngestRequest(BaseModel):
strategy: ChunkingStrategyType = "token"
class IngestResponse(BaseModel):
document_id: str
chunk_count: int
filename: str
strategy: ChunkingStrategyType = "token"

View File

@ -31,3 +31,47 @@ def test_config_default_values(monkeypatch):
settings = Settings()
assert settings.llm_base_url == "https://openrouter.ai/api/v1"
assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b"
def test_qa_chunking_config_defaults(monkeypatch):
"""Phase 8.0: Q&A chunking config fields have correct defaults."""
monkeypatch.delenv("DEFAULT_CHUNKING_STRATEGY", raising=False)
monkeypatch.delenv("QA_VISION_ENABLED", raising=False)
monkeypatch.delenv("QA_MAX_CHUNK_TOKENS", raising=False)
monkeypatch.delenv("QA_STRUCTURE_MODEL", raising=False)
monkeypatch.delenv("QA_INCLUDE_INTERNAL_REFS", raising=False)
monkeypatch.delenv("QA_CACHE_VISION_RESULTS", raising=False)
from app.core.config import Settings
settings = Settings()
assert settings.default_chunking_strategy == "token"
assert settings.qa_vision_enabled is True
assert settings.qa_max_chunk_tokens == 3000
assert settings.qa_structure_model == ""
assert settings.qa_include_internal_refs is True
assert settings.qa_cache_vision_results is True
def test_qa_chunking_config_from_env(tmp_path, monkeypatch):
"""Phase 8.0: Q&A chunking config fields load from .env."""
env_file = tmp_path / ".env"
env_file.write_text(
"DEFAULT_CHUNKING_STRATEGY=question\n"
"QA_VISION_ENABLED=false\n"
"QA_MAX_CHUNK_TOKENS=5000\n"
"QA_STRUCTURE_MODEL=anthropic/claude-3-haiku\n"
"QA_INCLUDE_INTERNAL_REFS=false\n"
"QA_CACHE_VISION_RESULTS=false\n"
)
monkeypatch.chdir(tmp_path)
from app.core.config import Settings
settings = Settings()
assert settings.default_chunking_strategy == "question"
assert settings.qa_vision_enabled is False
assert settings.qa_max_chunk_tokens == 5000
assert settings.qa_structure_model == "anthropic/claude-3-haiku"
assert settings.qa_include_internal_refs is False
assert settings.qa_cache_vision_results is False

View File

@ -1,3 +1,5 @@
export type ChunkingStrategy = 'token' | 'question'
export interface SourceMetadata {
filename: string
upload_date: string
@ -40,6 +42,7 @@ export interface IngestResponse {
document_id: string
chunk_count: number
filename: string
strategy: ChunkingStrategy
}
export interface DocumentInfo {
@ -47,6 +50,7 @@ export interface DocumentInfo {
filename: string
chunk_count: number
upload_date: string
chunking_strategy: ChunkingStrategy
}
export interface ChunkInfo {
@ -55,6 +59,13 @@ export interface ChunkInfo {
content_summary: string
page_number: number | null
chunk_file_path: string | null
strategy_type: ChunkingStrategy
question_index: number | null
question_id: string | null
question_text: string | null
topic_section: string | null
source_page_range: number[] | null
has_table: boolean | null
}
export interface DocumentListResponse {