From 029a0e490fac6f0a5da8c01dc7293e3c6155e835 Mon Sep 17 00:00:00 2001
From: Woody <woody.ck.tse@gmail.com>
Date: Thu, 23 Apr 2026 16:28:43 +0800
Subject: [PATCH] debug(backend): add LLM request/response logging for
 OpenRouter debugging

- Log extra_body contents before sending to LLM

- Log full LLM response object for debugging

- Changed extra_body format to OpenRouter reasoning format

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e
---
 backend/app/services/llm_client.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py
index ff2b615..6069367 100644
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@@ -52,7 +52,7 @@ class LLMClient:
         prompt_preview = self._truncate_prompt_for_log(prompt)
         self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
         start_time = time.perf_counter()
-
+        self.logger.info("LLM Extra Body %s", str(extra_body))
         try:
             response = await self._client.chat.completions.create(
                 model=self.model,
@@ -60,6 +60,7 @@ class LLMClient:
                 temperature=temperature,
                 extra_body=extra_body if extra_body else None,
             )
+            self.logger.info("LLM Response: %s",str(response))
             content = response.choices[0].message.content or ""
             elapsed_ms = (time.perf_counter() - start_time) * 1000
             self.logger.info(
@@ -92,10 +93,11 @@ class LLMClient:
         # Non-thinking mode for Qwen3.5
         # Uses chat_template_kwargs for vLLM/SGLang compatibility.
         # For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
-        return {
-            "chat_template_kwargs": {"enable_thinking": False},
-            "top_k": 20,
-        }
+        # return {
+        #     "chat_template_kwargs": {"enable_thinking": False},
+        #     "top_k": 20,
+        # }
+        return {"reasoning": {"enabled": False}}
 
     async def close(self):
         await self._client.close()