debug(backend): add LLM request/response logging for OpenRouter debugging
- Log extra_body contents before sending to LLM - Log full LLM response object for debugging - Changed extra_body format to OpenRouter reasoning format Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e
This commit is contained in:
parent
33b960f786
commit
029a0e490f
|
|
@ -52,7 +52,7 @@ class LLMClient:
|
|||
prompt_preview = self._truncate_prompt_for_log(prompt)
|
||||
self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
|
||||
start_time = time.perf_counter()
|
||||
|
||||
self.logger.info("LLM Extra Body %s", str(extra_body))
|
||||
try:
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
|
|
@ -60,6 +60,7 @@ class LLMClient:
|
|||
temperature=temperature,
|
||||
extra_body=extra_body if extra_body else None,
|
||||
)
|
||||
self.logger.info("LLM Response: %s",str(response))
|
||||
content = response.choices[0].message.content or ""
|
||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||
self.logger.info(
|
||||
|
|
@ -92,10 +93,11 @@ class LLMClient:
|
|||
# Non-thinking mode for Qwen3.5
|
||||
# Uses chat_template_kwargs for vLLM/SGLang compatibility.
|
||||
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
|
||||
return {
|
||||
"chat_template_kwargs": {"enable_thinking": False},
|
||||
"top_k": 20,
|
||||
}
|
||||
# return {
|
||||
# "chat_template_kwargs": {"enable_thinking": False},
|
||||
# "top_k": 20,
|
||||
# }
|
||||
return {"reasoning": {"enabled": False}}
|
||||
|
||||
async def close(self):
|
||||
await self._client.close()
|
||||
|
|
|
|||
Loading…
Reference in New Issue