diff --git a/backend/app/core/sqlite_db.py b/backend/app/core/sqlite_db.py
index 0bcd724..73b1d96 100644
--- a/backend/app/core/sqlite_db.py
+++ b/backend/app/core/sqlite_db.py
@@ -13,7 +13,8 @@ _SEED_DECOMPOSE = (
     "Given this question: '{question}'\n\n"
     "Break it down into 2-5 simplified sub-questions that would help "
     "search for relevant information. Each sub-question should be short "
-    "and focused on one aspect."
+    "and focused on one aspect.\n\n"
+    'Return a JSON array of strings: ["sub-question 1", "sub-question 2", ...]'
 )
 
 _SEED_FILTER = (
diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py
index 1672997..b0b8154 100644
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@@ -1,7 +1,8 @@
+import json
 import logging
 import os
 import time
-from typing import Optional
+from typing import Any, Optional
 
 import httpx
 from openai import AsyncOpenAI, APIError, APITimeoutError
@@ -131,17 +132,60 @@ class LLMClient:
         self.logger.info("[%s] Structured LLM request started. Prompt: %s", step_name, prompt_preview)
         start_time = time.perf_counter()
 
-        extra_body = self._build_extra_body()
-        self.logger.info("[%s] Structured LLM Extra Body: %s", step_name, str(extra_body))
+        if self.settings.vllm_engine:
+            return await self._complete_structured_vllm(prompt, pydantic_model, step_name, start_time)
 
+        return await self._complete_structured_openai(prompt, pydantic_model, step_name, start_time)
+
+    async def _complete_structured_vllm(self, prompt: str, pydantic_model, step_name: str, start_time: float):
+        """Use vLLM's native guided_json via extra_body for structured output."""
+        schema = pydantic_model.model_json_schema()
+        self.logger.info(
+            "[%s] vLLM structured: sending guided_json schema=%s",
+            step_name,
+            json.dumps(schema)[:300],
+        )
+
+        # Try the new unified format first, then legacy guided_json
+        for fmt_name, extra in [
+            ("structured_outputs", {"structured_outputs": {"json": schema}}),
+            ("guided_json", {"guided_json": schema}),
+        ]:
+            try:
+                self.logger.info("[%s] vLLM structured: trying format=%s", step_name, fmt_name)
+                response = await self._client.chat.completions.create(
+                    model=self.model,
+                    messages=[{"role": "user", "content": prompt}],
+                    temperature=0.0,
+                    extra_body=extra,
+                )
+                content = response.choices[0].message.content or ""
+                elapsed_ms = (time.perf_counter() - start_time) * 1000
+                self.logger.info(
+                    "[%s] vLLM structured succeeded with format=%s in %.2fms. Response: %s",
+                    step_name, fmt_name, elapsed_ms, content[:200],
+                )
+                return pydantic_model.model_validate_json(content)
+            except Exception as exc:
+                elapsed_ms = (time.perf_counter() - start_time) * 1000
+                self.logger.warning(
+                    "[%s] vLLM structured format=%s failed after %.2fms: %s",
+                    step_name, fmt_name, elapsed_ms, exc,
+                )
+
+        elapsed_ms = (time.perf_counter() - start_time) * 1000
+        self.logger.error(
+            "[%s] vLLM structured: all formats failed after %.2fms", step_name, elapsed_ms,
+            exc_info=True,
+        )
+        raise LLMClientError("vLLM structured output failed with all guided formats")
+
+    async def _complete_structured_openai(self, prompt: str, pydantic_model, step_name: str, start_time: float):
+        """Use OpenAI-native json_schema via LangChain's with_structured_output()."""
         try:
             model = self._get_langchain_model()
-            # vLLM servers may not support json_schema response_format. Use
-            # function_calling instead, which is more widely supported by
-            # open-source models served through vLLM.
-            method = "function_calling" if self.settings.vllm_engine else "json_schema"
-            self.logger.info("[%s] Structured output method: %s", step_name, method)
-            structured = model.with_structured_output(pydantic_model, method=method)
+            self.logger.info("[%s] Structured output method: json_schema (OpenAI-native)", step_name)
+            structured = model.with_structured_output(pydantic_model, method="json_schema")
             result = await structured.ainvoke(prompt)
 
             elapsed_ms = (time.perf_counter() - start_time) * 1000
@@ -158,5 +202,9 @@ class LLMClient:
             return result
         except Exception as exc:
             elapsed_ms = (time.perf_counter() - start_time) * 1000
-            self.logger.error("[%s] Structured LLM error after %.2fms: %s", step_name, elapsed_ms, exc)
+            self.logger.error(
+                "[%s] Structured LLM error after %.2fms: %s",
+                step_name, elapsed_ms, exc,
+                exc_info=True,
+            )
             raise LLMClientError from exc
diff --git a/backend/app/services/query_decomposer.py b/backend/app/services/query_decomposer.py
index 4b45093..e7293e6 100644
--- a/backend/app/services/query_decomposer.py
+++ b/backend/app/services/query_decomposer.py
@@ -41,10 +41,12 @@ def _extract_json_from_markdown(response: str) -> str:
 
 
 def _parse_legacy_json(response: str) -> List[str]:
-    response = _extract_json_from_markdown(response)
+    extracted = _extract_json_from_markdown(response)
+    logger.info("Legacy JSON parse: extracted text (first 300 chars): %s", extracted[:300] if extracted else "(empty)")
     try:
-        data = json.loads(response)
+        data = json.loads(extracted)
     except json.JSONDecodeError:
+        logger.warning("Legacy JSON parse: json.loads failed on extracted text")
         return []
 
     if not isinstance(data, list):
@@ -90,6 +92,7 @@ class QueryDecomposer:
 
         if self._prompt_service is not None:
             template = self._prompt_service.get_prompt_template("decompose")
+            logger.info("Decompose prompt template (first 200 chars): %s", template[:200] if template else "(empty)")
         else:
             template = _BUILTIN_DECOMPOSE_TEMPLATE