feat: Sub-Phase 8.0 — config & enums for Q&A-pair chunking strategy
Backend:
- Add 6 Q&A chunking config fields to Settings (default_chunking_strategy,
qa_vision_enabled, qa_max_chunk_tokens, qa_structure_model,
qa_include_internal_refs, qa_cache_vision_results)
- Define ChunkingStrategyType Literal + VALID_CHUNKING_STRATEGIES frozenset
- Add strategy field to IngestResponse (default token, non-breaking)
- Add IngestRequest model with strategy param
- Update .env.example with new env vars
Frontend:
- Add ChunkingStrategy type ('token' | 'question')
- Extend IngestResponse, DocumentInfo, ChunkInfo with Q&A fields
Tests:
- test_qa_chunking_config_defaults — all defaults verified
- test_qa_chunking_config_from_env — env var overrides verified
Plan fix: renamed qa_verification_model → qa_structure_model to match
LLM-first architecture
This commit is contained in:
parent
6bf04cedb1
commit
ef10b937cf
|
|
@ -327,7 +327,7 @@ For each section in the JSON response:
|
|||
If `qa_vision_enabled=false` or for cost optimization, use regex as a fast pre-filter. The LLM call is skipped for documents where regex confidently identifies all boundaries, and used only when:
|
||||
- No regex pattern matches (unknown format)
|
||||
- Regex produces < 2 sections (likely misdetection)
|
||||
- `qa_verification_model` is not set to `"none"`
|
||||
- `qa_structure_model` is not set to `"none"`
|
||||
|
||||
### Algorithm Detail: Table-to-Markdown
|
||||
|
||||
|
|
@ -382,7 +382,7 @@ class Settings(BaseSettings):
|
|||
# NEW: Q&A chunking config
|
||||
qa_vision_enabled: bool = True # Toggle vision-based table extraction (uses existing LLM_MODEL_NAME)
|
||||
qa_max_chunk_tokens: int = 3000 # Max tokens per Q&A chunk (before forced split)
|
||||
qa_verification_model: str = "" # LLM for boundary verification (empty = use LLM_MODEL_NAME)
|
||||
qa_structure_model: str = "" # LLM for structure detection (empty = use LLM_MODEL_NAME)
|
||||
qa_include_internal_refs: bool = True # Include [內部參考] in chunks
|
||||
qa_cache_vision_results: bool = True # Cache vision results per page
|
||||
|
||||
|
|
@ -390,7 +390,7 @@ class Settings(BaseSettings):
|
|||
# DEFAULT_CHUNKING_STRATEGY=token
|
||||
# QA_VISION_ENABLED=true
|
||||
# QA_MAX_CHUNK_TOKENS=3000
|
||||
# QA_VERIFICATION_MODEL=
|
||||
# QA_STRUCTURE_MODEL=
|
||||
# QA_INCLUDE_INTERNAL_REFS=true
|
||||
# QA_CACHE_VISION_RESULTS=true
|
||||
|
||||
|
|
|
|||
|
|
@ -41,3 +41,11 @@ MAX_VIDEO_SIZE_MB=300
|
|||
# Set to false to disable System Audio or Listen Mic capture
|
||||
SYSTEM_AUDIO_ENABLED=true
|
||||
MIC_ENABLED=true
|
||||
|
||||
# Q&A-pair chunking (Package 8)
|
||||
DEFAULT_CHUNKING_STRATEGY=token
|
||||
QA_VISION_ENABLED=true
|
||||
QA_MAX_CHUNK_TOKENS=3000
|
||||
QA_STRUCTURE_MODEL=
|
||||
QA_INCLUDE_INTERNAL_REFS=true
|
||||
QA_CACHE_VISION_RESULTS=true
|
||||
|
|
|
|||
|
|
@ -44,6 +44,14 @@ class Settings(BaseSettings):
|
|||
relevance_threshold: float = 7.0
|
||||
llm_timeout: float = 60.0
|
||||
|
||||
# Q&A-pair chunking strategy (Package 8)
|
||||
default_chunking_strategy: str = "token"
|
||||
qa_vision_enabled: bool = True
|
||||
qa_max_chunk_tokens: int = 3000
|
||||
qa_structure_model: str = ""
|
||||
qa_include_internal_refs: bool = True
|
||||
qa_cache_vision_results: bool = True
|
||||
|
||||
# Alibaba Cloud DashScope ASR (Phase 2)
|
||||
dashscope_api_key: str = ""
|
||||
asr_model_name: str = "qwen3-asr-flash"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,18 @@
|
|||
from typing import Literal
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
ChunkingStrategyType = Literal["token", "question"]
|
||||
|
||||
VALID_CHUNKING_STRATEGIES = frozenset({"token", "question"})
|
||||
|
||||
|
||||
class IngestRequest(BaseModel):
|
||||
strategy: ChunkingStrategyType = "token"
|
||||
|
||||
|
||||
class IngestResponse(BaseModel):
|
||||
document_id: str
|
||||
chunk_count: int
|
||||
filename: str
|
||||
strategy: ChunkingStrategyType = "token"
|
||||
|
|
|
|||
|
|
@ -31,3 +31,47 @@ def test_config_default_values(monkeypatch):
|
|||
settings = Settings()
|
||||
assert settings.llm_base_url == "https://openrouter.ai/api/v1"
|
||||
assert settings.llm_model_name == "qwen/qwen3.5-35b-a3b"
|
||||
|
||||
|
||||
def test_qa_chunking_config_defaults(monkeypatch):
|
||||
"""Phase 8.0: Q&A chunking config fields have correct defaults."""
|
||||
monkeypatch.delenv("DEFAULT_CHUNKING_STRATEGY", raising=False)
|
||||
monkeypatch.delenv("QA_VISION_ENABLED", raising=False)
|
||||
monkeypatch.delenv("QA_MAX_CHUNK_TOKENS", raising=False)
|
||||
monkeypatch.delenv("QA_STRUCTURE_MODEL", raising=False)
|
||||
monkeypatch.delenv("QA_INCLUDE_INTERNAL_REFS", raising=False)
|
||||
monkeypatch.delenv("QA_CACHE_VISION_RESULTS", raising=False)
|
||||
|
||||
from app.core.config import Settings
|
||||
|
||||
settings = Settings()
|
||||
assert settings.default_chunking_strategy == "token"
|
||||
assert settings.qa_vision_enabled is True
|
||||
assert settings.qa_max_chunk_tokens == 3000
|
||||
assert settings.qa_structure_model == ""
|
||||
assert settings.qa_include_internal_refs is True
|
||||
assert settings.qa_cache_vision_results is True
|
||||
|
||||
|
||||
def test_qa_chunking_config_from_env(tmp_path, monkeypatch):
|
||||
"""Phase 8.0: Q&A chunking config fields load from .env."""
|
||||
env_file = tmp_path / ".env"
|
||||
env_file.write_text(
|
||||
"DEFAULT_CHUNKING_STRATEGY=question\n"
|
||||
"QA_VISION_ENABLED=false\n"
|
||||
"QA_MAX_CHUNK_TOKENS=5000\n"
|
||||
"QA_STRUCTURE_MODEL=anthropic/claude-3-haiku\n"
|
||||
"QA_INCLUDE_INTERNAL_REFS=false\n"
|
||||
"QA_CACHE_VISION_RESULTS=false\n"
|
||||
)
|
||||
|
||||
monkeypatch.chdir(tmp_path)
|
||||
from app.core.config import Settings
|
||||
|
||||
settings = Settings()
|
||||
assert settings.default_chunking_strategy == "question"
|
||||
assert settings.qa_vision_enabled is False
|
||||
assert settings.qa_max_chunk_tokens == 5000
|
||||
assert settings.qa_structure_model == "anthropic/claude-3-haiku"
|
||||
assert settings.qa_include_internal_refs is False
|
||||
assert settings.qa_cache_vision_results is False
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
export type ChunkingStrategy = 'token' | 'question'
|
||||
|
||||
export interface SourceMetadata {
|
||||
filename: string
|
||||
upload_date: string
|
||||
|
|
@ -40,6 +42,7 @@ export interface IngestResponse {
|
|||
document_id: string
|
||||
chunk_count: number
|
||||
filename: string
|
||||
strategy: ChunkingStrategy
|
||||
}
|
||||
|
||||
export interface DocumentInfo {
|
||||
|
|
@ -47,6 +50,7 @@ export interface DocumentInfo {
|
|||
filename: string
|
||||
chunk_count: number
|
||||
upload_date: string
|
||||
chunking_strategy: ChunkingStrategy
|
||||
}
|
||||
|
||||
export interface ChunkInfo {
|
||||
|
|
@ -55,6 +59,13 @@ export interface ChunkInfo {
|
|||
content_summary: string
|
||||
page_number: number | null
|
||||
chunk_file_path: string | null
|
||||
strategy_type: ChunkingStrategy
|
||||
question_index: number | null
|
||||
question_id: string | null
|
||||
question_text: string | null
|
||||
topic_section: string | null
|
||||
source_page_range: number[] | null
|
||||
has_table: boolean | null
|
||||
}
|
||||
|
||||
export interface DocumentListResponse {
|
||||
|
|
|
|||
Loading…
Reference in New Issue