feat: inject Pydantic JSON schema into Deepseek prompt (Phase 6)
Follows Deepseek JSON Output guide: the prompt now includes the word 'json' and a format example derived from the Pydantic model schema. Added _pydantic_to_json_instruction() helper that builds a human-readable schema description with EXAMPLE JSON OUTPUT. Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
226f4ed700
commit
df62283f58
|
|
@ -39,6 +39,53 @@ def _truncate_prompt_for_log(prompt: str, first_chars: int = 100, last_chars: in
|
|||
)
|
||||
|
||||
|
||||
def _pydantic_to_json_instruction(model: Any) -> str:
|
||||
"""Build a JSON-format instruction from a Pydantic model's schema.
|
||||
|
||||
Follows the Deepseek JSON Output guide: the prompt must contain the word
|
||||
"json" and an example of the expected shape. The model schema is
|
||||
converted into a human-readable text description with a filled-in example.
|
||||
"""
|
||||
schema = model.model_json_schema()
|
||||
props = schema.get("properties", {})
|
||||
title = schema.get("title", model.__name__)
|
||||
|
||||
parts: list[str] = []
|
||||
parts.append(f"Output the result in JSON format as a {title} object.")
|
||||
|
||||
# Build an example by filling each field with a representative value.
|
||||
example: dict[str, Any] = {}
|
||||
for name, info in props.items():
|
||||
t = info.get("type", "any")
|
||||
desc = info.get("description", "")
|
||||
if t == "array":
|
||||
items = info.get("items", {})
|
||||
item_type = items.get("type", "string")
|
||||
min_items = info.get("minItems", 1)
|
||||
parts.append(
|
||||
f'- "{name}": array of {item_type} '
|
||||
f"(min {min_items}"
|
||||
+ (f", max {info['maxItems']}" if info.get("maxItems") else "")
|
||||
+ f") — {desc}"
|
||||
)
|
||||
example[name] = [f"<{item_type}_1>", f"<{item_type}_2>"]
|
||||
elif t == "string":
|
||||
parts.append(f'- "{name}": {t} — {desc}')
|
||||
example[name] = f"<{desc[:40]}>"
|
||||
elif t == "integer" or t == "number":
|
||||
parts.append(f'- "{name}": {t} — {desc}')
|
||||
example[name] = 0
|
||||
else:
|
||||
parts.append(f'- "{name}": {t} — {desc}')
|
||||
example[name] = f"<{t}>"
|
||||
|
||||
parts.append("")
|
||||
parts.append("EXAMPLE JSON OUTPUT:")
|
||||
parts.append(json.dumps(example, indent=2, ensure_ascii=False))
|
||||
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
class LLMClientDP:
|
||||
"""Async Deepseek API client for query decomposition.
|
||||
|
||||
|
|
@ -126,17 +173,21 @@ class LLMClientDP:
|
|||
|
||||
Deepseek supports ``response_format={"type": "json_object"}`` (which
|
||||
guarantees valid JSON) but not OpenAI's ``json_schema`` mode (which
|
||||
would validate against a specific schema). We use the JSON mode to
|
||||
get a guaranteed-valid JSON response, then validate it client-side
|
||||
against *pydantic_model*.
|
||||
would validate against a specific schema). We inject a JSON format
|
||||
instruction derived from *pydantic_model* into the prompt (per the
|
||||
Deepseek JSON Output guide), then validate client-side.
|
||||
"""
|
||||
prompt_preview = _truncate_prompt_for_log(prompt, first_chars=300, last_chars=100)
|
||||
logger.info("[%s] Deepseek structured request started. Prompt: %s", step_name, prompt_preview)
|
||||
start_time = time.perf_counter()
|
||||
|
||||
# Inject JSON format instruction from the Pydantic model.
|
||||
json_instruction = _pydantic_to_json_instruction(pydantic_model)
|
||||
full_prompt = f"{prompt}\n\n{json_instruction}"
|
||||
|
||||
try:
|
||||
response = await self.complete(
|
||||
prompt=prompt,
|
||||
prompt=full_prompt,
|
||||
temperature=0.0,
|
||||
step_name=step_name,
|
||||
response_format={"type": "json_object"},
|
||||
|
|
|
|||
|
|
@ -191,11 +191,16 @@ class TestLLMClientDPCompleteStructured:
|
|||
assert result == expected
|
||||
assert result.questions == ["Q1", "Q2", "Q3"]
|
||||
mock_complete.assert_called_once()
|
||||
# Verify Deepseek JSON mode is used
|
||||
call_kwargs = mock_complete.call_args.kwargs
|
||||
assert call_kwargs["response_format"] == {"type": "json_object"}
|
||||
assert call_kwargs["temperature"] == 0.0
|
||||
|
||||
# The prompt must contain the JSON format instruction per Deepseek docs.
|
||||
full_prompt = call_kwargs["prompt"]
|
||||
assert "json" in full_prompt.lower()
|
||||
assert "EXAMPLE JSON OUTPUT" in full_prompt
|
||||
assert '"questions"' in full_prompt
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_complete_structured_with_markdown_fence(self, client):
|
||||
"""Should strip markdown code fences before JSON parsing."""
|
||||
|
|
|
|||
Loading…
Reference in New Issue