feat(backend): add LLM monitoring with step names, timing, and prompt logging
- LLMClient.complete() now accepts step_name parameter to identify processing step - Logs prompt preview (first 100 + last 100 chars) at INFO level - Logs processing time in milliseconds with token usage stats - Updated QueryDecomposer, RelevanceFilter, and RAGService to pass step names Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
74cb8b83d5
commit
f5cfe44183
|
|
@ -1,4 +1,5 @@
|
|||
import logging
|
||||
import time
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
|
@ -28,11 +29,30 @@ class LLMClient:
|
|||
),
|
||||
)
|
||||
|
||||
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||
"""Send a chat completion request with optional thinking control."""
|
||||
def _truncate_prompt_for_log(self, prompt: str, first_chars: int = 100, last_chars: int = 100) -> str:
|
||||
"""Truncate prompt for logging: show first N and last N chars with ellipsis."""
|
||||
if len(prompt) <= first_chars + last_chars:
|
||||
return prompt
|
||||
return f"{prompt[:first_chars]}...({len(prompt) - first_chars - last_chars} chars omitted)...{prompt[-last_chars:]}"
|
||||
|
||||
async def complete(self, prompt: str, temperature: float = 0.7, step_name: str = "LLM") -> str:
|
||||
"""Send a chat completion request with optional thinking control.
|
||||
|
||||
Args:
|
||||
prompt: The prompt to send to the LLM.
|
||||
temperature: Sampling temperature.
|
||||
step_name: Identifier for the processing step (e.g., "QueryDecomposer").
|
||||
|
||||
Returns:
|
||||
The LLM response text.
|
||||
"""
|
||||
messages = [{"role": "user", "content": prompt}]
|
||||
extra_body = self._build_extra_body()
|
||||
|
||||
prompt_preview = self._truncate_prompt_for_log(prompt)
|
||||
self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview)
|
||||
start_time = time.perf_counter()
|
||||
|
||||
try:
|
||||
response = await self._client.chat.completions.create(
|
||||
model=self.model,
|
||||
|
|
@ -40,12 +60,23 @@ class LLMClient:
|
|||
temperature=temperature,
|
||||
extra_body=extra_body if extra_body else None,
|
||||
)
|
||||
return response.choices[0].message.content or ""
|
||||
content = response.choices[0].message.content or ""
|
||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||
self.logger.info(
|
||||
"[%s] LLM request completed in %.2fms (prompt_tokens=%s, completion_tokens=%s)",
|
||||
step_name,
|
||||
elapsed_ms,
|
||||
response.usage.prompt_tokens if response.usage else "?",
|
||||
response.usage.completion_tokens if response.usage else "?",
|
||||
)
|
||||
return content
|
||||
except (APITimeoutError, APIError) as exc:
|
||||
self.logger.error("LLM API error: %s", exc)
|
||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||
self.logger.error("[%s] LLM API error after %.2fms: %s", step_name, elapsed_ms, exc)
|
||||
raise LLMClientError from exc
|
||||
except Exception as exc:
|
||||
self.logger.error("Unexpected LLM error: %s", exc)
|
||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||
self.logger.error("[%s] Unexpected LLM error after %.2fms: %s", step_name, elapsed_ms, exc)
|
||||
raise LLMClientError from exc
|
||||
|
||||
def _build_extra_body(self) -> dict:
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class QueryDecomposer:
|
|||
prompt = f"Given question: '{question}', extract key search keywords as JSON array"
|
||||
|
||||
try:
|
||||
response = await self.llm_client.complete(prompt)
|
||||
response = await self.llm_client.complete(prompt, step_name="QueryDecomposer")
|
||||
except Exception as exc:
|
||||
logger.warning("LLM decomposition failed: %s", exc)
|
||||
return []
|
||||
|
|
|
|||
|
|
@ -112,4 +112,4 @@ class RAGService:
|
|||
f"Answer:"
|
||||
)
|
||||
|
||||
return await self.llm_client.complete(prompt=prompt, temperature=0.3)
|
||||
return await self.llm_client.complete(prompt=prompt, temperature=0.3, step_name="ResponseGeneration")
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class RelevanceFilter:
|
|||
|
||||
prompt = self._build_prompt(question, chunks)
|
||||
try:
|
||||
response = await self.llm_client.complete(prompt, temperature=0.0)
|
||||
response = await self.llm_client.complete(prompt, temperature=0.0, step_name="RelevanceFilter")
|
||||
except Exception as exc:
|
||||
logger.error("RelevanceFilter LLM call failed: %s", exc)
|
||||
return []
|
||||
|
|
|
|||
Loading…
Reference in New Issue