debug(backend): add LLM request/response logging for OpenRouter debugging

- Log extra_body contents before sending to LLM - Log full LLM response object for debugging - Changed extra_body format to OpenRouter reasoning format Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e
2026-04-23 16:28:43 +08:00 · 2026-04-23 16:28:43 +08:00 · 029a0e490f
parent 33b960f786
commit 029a0e490f
1 changed files with 7 additions and 5 deletions
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@ -52,7 +52,7 @@ class LLMClient:
        prompt_preview = self._truncate_prompt_for_log(prompt)
        self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
        start_time = time.perf_counter()
-
+        self.logger.info("LLM Extra Body %s", str(extra_body))
        try:
            response = await self._client.chat.completions.create(
                model=self.model,
@ -60,6 +60,7 @@ class LLMClient:
                temperature=temperature,
                extra_body=extra_body if extra_body else None,
            )
+            self.logger.info("LLM Response: %s",str(response))
            content = response.choices[0].message.content or ""
            elapsed_ms = (time.perf_counter() - start_time) * 1000
            self.logger.info(
@ -92,10 +93,11 @@ class LLMClient:
        # Non-thinking mode for Qwen3.5
        # Uses chat_template_kwargs for vLLM/SGLang compatibility.
        # For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
-        return {
-            "chat_template_kwargs": {"enable_thinking": False},
-            "top_k": 20,
-        }
+        # return {
+        #     "chat_template_kwargs": {"enable_thinking": False},
+        #     "top_k": 20,
+        # }
+        return {"reasoning": {"enabled": False}}

    async def close(self):
        await self._client.close()