diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py
index 7a5a720..e54c454 100644
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@@ -146,11 +146,16 @@ class LLMClient:
             step_name, prompt_preview, json.dumps(schema)[:300],
         )
 
+        # Merge thinking-control params so vLLM structured calls
+        # also respect enable_thinking/vllm_engine config (was missing).
+        body = self._build_extra_body()
+
         # Try the new unified format first, then legacy guided_json
-        for fmt_name, extra in [
+        for fmt_name, base_extra in [
             ("structured_outputs", {"structured_outputs": {"json": schema}}),
             ("guided_json", {"guided_json": schema}),
         ]:
+            extra = {**base_extra, **body}
             try:
                 self.logger.info("[%s] vLLM structured: trying format=%s extra=%s", step_name, fmt_name, extra)
                 response = await self._client.chat.completions.create(