diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py
index b0b8154..7a5a720 100644
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@@ -140,10 +140,10 @@ class LLMClient:
     async def _complete_structured_vllm(self, prompt: str, pydantic_model, step_name: str, start_time: float):
         """Use vLLM's native guided_json via extra_body for structured output."""
         schema = pydantic_model.model_json_schema()
+        prompt_preview = self._truncate_prompt_for_log(prompt, first_chars=300, last_chars=100)
         self.logger.info(
-            "[%s] vLLM structured: sending guided_json schema=%s",
-            step_name,
-            json.dumps(schema)[:300],
+            "[%s] vLLM structured: prompt=%s schema=%s",
+            step_name, prompt_preview, json.dumps(schema)[:300],
         )
 
         # Try the new unified format first, then legacy guided_json
@@ -152,18 +152,22 @@ class LLMClient:
             ("guided_json", {"guided_json": schema}),
         ]:
             try:
-                self.logger.info("[%s] vLLM structured: trying format=%s", step_name, fmt_name)
+                self.logger.info("[%s] vLLM structured: trying format=%s extra=%s", step_name, fmt_name, extra)
                 response = await self._client.chat.completions.create(
                     model=self.model,
                     messages=[{"role": "user", "content": prompt}],
                     temperature=0.0,
                     extra_body=extra,
                 )
+                self.logger.info("[%s] vLLM structured full response: %s", step_name, response)
                 content = response.choices[0].message.content or ""
                 elapsed_ms = (time.perf_counter() - start_time) * 1000
                 self.logger.info(
-                    "[%s] vLLM structured succeeded with format=%s in %.2fms. Response: %s",
-                    step_name, fmt_name, elapsed_ms, content[:200],
+                    "[%s] vLLM structured succeeded format=%s in %.2fms tokens=(%s/%s). Parsed=%s",
+                    step_name, fmt_name, elapsed_ms,
+                    response.usage.prompt_tokens if response.usage else "?",
+                    response.usage.completion_tokens if response.usage else "?",
+                    content,
                 )
                 return pydantic_model.model_validate_json(content)
             except Exception as exc:
@@ -171,6 +175,7 @@ class LLMClient:
                 self.logger.warning(
                     "[%s] vLLM structured format=%s failed after %.2fms: %s",
                     step_name, fmt_name, elapsed_ms, exc,
+                    exc_info=True,
                 )
 
         elapsed_ms = (time.perf_counter() - start_time) * 1000