debug(backend): add LLM request/response logging for OpenRouter debugging
- Log extra_body contents before sending to LLM - Log full LLM response object for debugging - Changed extra_body format to OpenRouter reasoning format Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus \u003cclio-agent@sisyphuslabs.ai\u003e
This commit is contained in:
parent
33b960f786
commit
029a0e490f
|
|
@ -52,7 +52,7 @@ class LLMClient:
|
||||||
prompt_preview = self._truncate_prompt_for_log(prompt)
|
prompt_preview = self._truncate_prompt_for_log(prompt)
|
||||||
self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
|
self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
|
||||||
start_time = time.perf_counter()
|
start_time = time.perf_counter()
|
||||||
|
self.logger.info("LLM Extra Body %s", str(extra_body))
|
||||||
try:
|
try:
|
||||||
response = await self._client.chat.completions.create(
|
response = await self._client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
|
|
@ -60,6 +60,7 @@ class LLMClient:
|
||||||
temperature=temperature,
|
temperature=temperature,
|
||||||
extra_body=extra_body if extra_body else None,
|
extra_body=extra_body if extra_body else None,
|
||||||
)
|
)
|
||||||
|
self.logger.info("LLM Response: %s",str(response))
|
||||||
content = response.choices[0].message.content or ""
|
content = response.choices[0].message.content or ""
|
||||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
|
|
@ -92,10 +93,11 @@ class LLMClient:
|
||||||
# Non-thinking mode for Qwen3.5
|
# Non-thinking mode for Qwen3.5
|
||||||
# Uses chat_template_kwargs for vLLM/SGLang compatibility.
|
# Uses chat_template_kwargs for vLLM/SGLang compatibility.
|
||||||
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
|
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
|
||||||
return {
|
# return {
|
||||||
"chat_template_kwargs": {"enable_thinking": False},
|
# "chat_template_kwargs": {"enable_thinking": False},
|
||||||
"top_k": 20,
|
# "top_k": 20,
|
||||||
}
|
# }
|
||||||
|
return {"reasoning": {"enabled": False}}
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
await self._client.close()
|
await self._client.close()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue