debug(backend): add LLM request/response logging for OpenRouter debugging

- Log extra_body contents before sending to LLM

- Log full LLM response object for debugging

- Changed extra_body format to OpenRouter reasoning format

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e
This commit is contained in:
Woody 2026-04-23 16:28:43 +08:00
parent 33b960f786
commit 029a0e490f
1 changed files with 7 additions and 5 deletions

View File

@ -52,7 +52,7 @@ class LLMClient:
prompt_preview = self._truncate_prompt_for_log(prompt) prompt_preview = self._truncate_prompt_for_log(prompt)
self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview) self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
start_time = time.perf_counter() start_time = time.perf_counter()
self.logger.info("LLM Extra Body %s", str(extra_body))
try: try:
response = await self._client.chat.completions.create( response = await self._client.chat.completions.create(
model=self.model, model=self.model,
@ -60,6 +60,7 @@ class LLMClient:
temperature=temperature, temperature=temperature,
extra_body=extra_body if extra_body else None, extra_body=extra_body if extra_body else None,
) )
self.logger.info("LLM Response: %s",str(response))
content = response.choices[0].message.content or "" content = response.choices[0].message.content or ""
elapsed_ms = (time.perf_counter() - start_time) * 1000 elapsed_ms = (time.perf_counter() - start_time) * 1000
self.logger.info( self.logger.info(
@ -92,10 +93,11 @@ class LLMClient:
# Non-thinking mode for Qwen3.5 # Non-thinking mode for Qwen3.5
# Uses chat_template_kwargs for vLLM/SGLang compatibility. # Uses chat_template_kwargs for vLLM/SGLang compatibility.
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead. # For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
return { # return {
"chat_template_kwargs": {"enable_thinking": False}, # "chat_template_kwargs": {"enable_thinking": False},
"top_k": 20, # "top_k": 20,
} # }
return {"reasoning": {"enabled": False}}
async def close(self): async def close(self):
await self._client.close() await self._client.close()