feat: inject Pydantic JSON schema into Deepseek prompt (Phase 6)

Follows Deepseek JSON Output guide: the prompt now includes the word 'json' and a format example derived from the Pydantic model schema. Added _pydantic_to_json_instruction() helper that builds a human-readable schema description with EXAMPLE JSON OUTPUT.

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-05-04 15:17:24 +08:00
parent 226f4ed700
commit df62283f58
2 changed files with 61 additions and 5 deletions

View File

@ -39,6 +39,53 @@ def _truncate_prompt_for_log(prompt: str, first_chars: int = 100, last_chars: in
) )
def _pydantic_to_json_instruction(model: Any) -> str:
"""Build a JSON-format instruction from a Pydantic model's schema.
Follows the Deepseek JSON Output guide: the prompt must contain the word
"json" and an example of the expected shape. The model schema is
converted into a human-readable text description with a filled-in example.
"""
schema = model.model_json_schema()
props = schema.get("properties", {})
title = schema.get("title", model.__name__)
parts: list[str] = []
parts.append(f"Output the result in JSON format as a {title} object.")
# Build an example by filling each field with a representative value.
example: dict[str, Any] = {}
for name, info in props.items():
t = info.get("type", "any")
desc = info.get("description", "")
if t == "array":
items = info.get("items", {})
item_type = items.get("type", "string")
min_items = info.get("minItems", 1)
parts.append(
f'- "{name}": array of {item_type} '
f"(min {min_items}"
+ (f", max {info['maxItems']}" if info.get("maxItems") else "")
+ f") — {desc}"
)
example[name] = [f"<{item_type}_1>", f"<{item_type}_2>"]
elif t == "string":
parts.append(f'- "{name}": {t}{desc}')
example[name] = f"<{desc[:40]}>"
elif t == "integer" or t == "number":
parts.append(f'- "{name}": {t}{desc}')
example[name] = 0
else:
parts.append(f'- "{name}": {t}{desc}')
example[name] = f"<{t}>"
parts.append("")
parts.append("EXAMPLE JSON OUTPUT:")
parts.append(json.dumps(example, indent=2, ensure_ascii=False))
return "\n".join(parts)
class LLMClientDP: class LLMClientDP:
"""Async Deepseek API client for query decomposition. """Async Deepseek API client for query decomposition.
@ -126,17 +173,21 @@ class LLMClientDP:
Deepseek supports ``response_format={"type": "json_object"}`` (which Deepseek supports ``response_format={"type": "json_object"}`` (which
guarantees valid JSON) but not OpenAI's ``json_schema`` mode (which guarantees valid JSON) but not OpenAI's ``json_schema`` mode (which
would validate against a specific schema). We use the JSON mode to would validate against a specific schema). We inject a JSON format
get a guaranteed-valid JSON response, then validate it client-side instruction derived from *pydantic_model* into the prompt (per the
against *pydantic_model*. Deepseek JSON Output guide), then validate client-side.
""" """
prompt_preview = _truncate_prompt_for_log(prompt, first_chars=300, last_chars=100) prompt_preview = _truncate_prompt_for_log(prompt, first_chars=300, last_chars=100)
logger.info("[%s] Deepseek structured request started. Prompt: %s", step_name, prompt_preview) logger.info("[%s] Deepseek structured request started. Prompt: %s", step_name, prompt_preview)
start_time = time.perf_counter() start_time = time.perf_counter()
# Inject JSON format instruction from the Pydantic model.
json_instruction = _pydantic_to_json_instruction(pydantic_model)
full_prompt = f"{prompt}\n\n{json_instruction}"
try: try:
response = await self.complete( response = await self.complete(
prompt=prompt, prompt=full_prompt,
temperature=0.0, temperature=0.0,
step_name=step_name, step_name=step_name,
response_format={"type": "json_object"}, response_format={"type": "json_object"},

View File

@ -191,11 +191,16 @@ class TestLLMClientDPCompleteStructured:
assert result == expected assert result == expected
assert result.questions == ["Q1", "Q2", "Q3"] assert result.questions == ["Q1", "Q2", "Q3"]
mock_complete.assert_called_once() mock_complete.assert_called_once()
# Verify Deepseek JSON mode is used
call_kwargs = mock_complete.call_args.kwargs call_kwargs = mock_complete.call_args.kwargs
assert call_kwargs["response_format"] == {"type": "json_object"} assert call_kwargs["response_format"] == {"type": "json_object"}
assert call_kwargs["temperature"] == 0.0 assert call_kwargs["temperature"] == 0.0
# The prompt must contain the JSON format instruction per Deepseek docs.
full_prompt = call_kwargs["prompt"]
assert "json" in full_prompt.lower()
assert "EXAMPLE JSON OUTPUT" in full_prompt
assert '"questions"' in full_prompt
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_complete_structured_with_markdown_fence(self, client): async def test_complete_structured_with_markdown_fence(self, client):
"""Should strip markdown code fences before JSON parsing.""" """Should strip markdown code fences before JSON parsing."""