From 029a0e490fac6f0a5da8c01dc7293e3c6155e835 Mon Sep 17 00:00:00 2001 From: Woody Date: Thu, 23 Apr 2026 16:28:43 +0800 Subject: [PATCH] debug(backend): add LLM request/response logging for OpenRouter debugging - Log extra_body contents before sending to LLM - Log full LLM response object for debugging - Changed extra_body format to OpenRouter reasoning format Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e --- backend/app/services/llm_client.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index ff2b615..6069367 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -52,7 +52,7 @@ class LLMClient: prompt_preview = self._truncate_prompt_for_log(prompt) self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview) start_time = time.perf_counter() - + self.logger.info("LLM Extra Body %s", str(extra_body)) try: response = await self._client.chat.completions.create( model=self.model, @@ -60,6 +60,7 @@ class LLMClient: temperature=temperature, extra_body=extra_body if extra_body else None, ) + self.logger.info("LLM Response: %s",str(response)) content = response.choices[0].message.content or "" elapsed_ms = (time.perf_counter() - start_time) * 1000 self.logger.info( @@ -92,10 +93,11 @@ class LLMClient: # Non-thinking mode for Qwen3.5 # Uses chat_template_kwargs for vLLM/SGLang compatibility. # For Alibaba Cloud Model Studio, use top-level enable_thinking instead. - return { - "chat_template_kwargs": {"enable_thinking": False}, - "top_k": 20, - } + # return { + # "chat_template_kwargs": {"enable_thinking": False}, + # "top_k": 20, + # } + return {"reasoning": {"enabled": False}} async def close(self): await self._client.close()