diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index 7a5a720..e54c454 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -146,11 +146,16 @@ class LLMClient: step_name, prompt_preview, json.dumps(schema)[:300], ) + # Merge thinking-control params so vLLM structured calls + # also respect enable_thinking/vllm_engine config (was missing). + body = self._build_extra_body() + # Try the new unified format first, then legacy guided_json - for fmt_name, extra in [ + for fmt_name, base_extra in [ ("structured_outputs", {"structured_outputs": {"json": schema}}), ("guided_json", {"guided_json": schema}), ]: + extra = {**base_extra, **body} try: self.logger.info("[%s] vLLM structured: trying format=%s extra=%s", step_name, fmt_name, extra) response = await self._client.chat.completions.create(