From fc6b5463b5996e1b1650958f7a1c0f16a2cf183e Mon Sep 17 00:00:00 2001 From: Woody Date: Wed, 29 Apr 2026 21:01:10 +0800 Subject: [PATCH] fix: vLLM structured output missing thinking-control extra_body --- backend/app/services/llm_client.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index 7a5a720..e54c454 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -146,11 +146,16 @@ class LLMClient: step_name, prompt_preview, json.dumps(schema)[:300], ) + # Merge thinking-control params so vLLM structured calls + # also respect enable_thinking/vllm_engine config (was missing). + body = self._build_extra_body() + # Try the new unified format first, then legacy guided_json - for fmt_name, extra in [ + for fmt_name, base_extra in [ ("structured_outputs", {"structured_outputs": {"json": schema}}), ("guided_json", {"guided_json": schema}), ]: + extra = {**base_extra, **body} try: self.logger.info("[%s] vLLM structured: trying format=%s extra=%s", step_name, fmt_name, extra) response = await self._client.chat.completions.create(