legco_ai_assistant/backend/app/models/decompose.py

70 lines
2.6 KiB
Python

from typing import Any
from pydantic import BaseModel, Field, create_model
class SubQuestions(BaseModel):
"""Structured output model for query decomposition — static fallback.
When ``decompose_format`` is available from PromptService, a dynamic
model with DB-configured description and max_length is used instead.
See ``create_subquestions_model()``.
"""
questions: list[str] = Field(
description="請將問題/任務拆解成 1-3 個簡化子問題,標籤式主題必須清楚、簡潔、具體,一看就明白(建議 3-8 個字),若涉及地點、地區、人物、時間、金額/財政 等關鍵資訊,必須包含在標籤中 。具體提問/要求要精準、完整 並全部轉換成以下固定格式:\n「標籤式主題:具體提問/要求」",
min_length=1,
max_length=3,
)
def create_subquestions_model(description: str, max_length: int) -> type[BaseModel]:
"""Create a dynamic SubQuestions model with configurable field constraints.
Args:
description: Field description text injected into the JSON format
instruction sent to the LLM.
max_length: Maximum number of sub-questions (1-5).
Returns:
A Pydantic BaseModel subclass with the configured constraints.
"""
return create_model(
"SubQuestions",
questions=(
list[str],
Field(
description=description,
min_length=1,
max_length=max_length,
),
),
)
def parse_decompose_format(raw: str) -> dict[str, Any]:
"""Parse the decompose_format JSON string into a validated config dict.
Returns a dict with ``description`` (str) and ``max_length`` (int).
Raises ``ValueError`` if the JSON is invalid or fields are missing/malformed.
"""
import json
try:
config = json.loads(raw)
except json.JSONDecodeError as exc:
raise ValueError(f"decompose_format is not valid JSON: {exc}") from exc
if not isinstance(config, dict):
raise ValueError("decompose_format must be a JSON object")
description = config.get("description")
if not isinstance(description, str) or not description.strip():
raise ValueError("decompose_format.description must be a non-empty string")
max_length = config.get("max_length")
if not isinstance(max_length, int) or max_length < 1 or max_length > 5:
raise ValueError("decompose_format.max_length must be an integer between 1 and 5")
return {"description": description.strip(), "max_length": max_length}