From f5cfe44183724280a1bac248421973dc1574f808 Mon Sep 17 00:00:00 2001 From: Woody Date: Thu, 23 Apr 2026 14:51:57 +0800 Subject: [PATCH] feat(backend): add LLM monitoring with step names, timing, and prompt logging - LLMClient.complete() now accepts step_name parameter to identify processing step - Logs prompt preview (first 100 + last 100 chars) at INFO level - Logs processing time in milliseconds with token usage stats - Updated QueryDecomposer, RelevanceFilter, and RAGService to pass step names Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus --- backend/app/services/llm_client.py | 41 +++++++++++++++++++++--- backend/app/services/query_decomposer.py | 2 +- backend/app/services/rag.py | 2 +- backend/app/services/relevance_filter.py | 2 +- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index ef8a30b..ff2b615 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -1,4 +1,5 @@ import logging +import time from typing import Optional import httpx @@ -28,11 +29,30 @@ class LLMClient: ), ) - async def complete(self, prompt: str, temperature: float = 0.7) -> str: - """Send a chat completion request with optional thinking control.""" + def _truncate_prompt_for_log(self, prompt: str, first_chars: int = 100, last_chars: int = 100) -> str: + """Truncate prompt for logging: show first N and last N chars with ellipsis.""" + if len(prompt) <= first_chars + last_chars: + return prompt + return f"{prompt[:first_chars]}...({len(prompt) - first_chars - last_chars} chars omitted)...{prompt[-last_chars:]}" + + async def complete(self, prompt: str, temperature: float = 0.7, step_name: str = "LLM") -> str: + """Send a chat completion request with optional thinking control. + + Args: + prompt: The prompt to send to the LLM. + temperature: Sampling temperature. + step_name: Identifier for the processing step (e.g., "QueryDecomposer"). + + Returns: + The LLM response text. + """ messages = [{"role": "user", "content": prompt}] extra_body = self._build_extra_body() + prompt_preview = self._truncate_prompt_for_log(prompt) + self.logger.info("[%s] LLM request started. Prompt: %s", step_name, prompt_preview) + start_time = time.perf_counter() + try: response = await self._client.chat.completions.create( model=self.model, @@ -40,12 +60,23 @@ class LLMClient: temperature=temperature, extra_body=extra_body if extra_body else None, ) - return response.choices[0].message.content or "" + content = response.choices[0].message.content or "" + elapsed_ms = (time.perf_counter() - start_time) * 1000 + self.logger.info( + "[%s] LLM request completed in %.2fms (prompt_tokens=%s, completion_tokens=%s)", + step_name, + elapsed_ms, + response.usage.prompt_tokens if response.usage else "?", + response.usage.completion_tokens if response.usage else "?", + ) + return content except (APITimeoutError, APIError) as exc: - self.logger.error("LLM API error: %s", exc) + elapsed_ms = (time.perf_counter() - start_time) * 1000 + self.logger.error("[%s] LLM API error after %.2fms: %s", step_name, elapsed_ms, exc) raise LLMClientError from exc except Exception as exc: - self.logger.error("Unexpected LLM error: %s", exc) + elapsed_ms = (time.perf_counter() - start_time) * 1000 + self.logger.error("[%s] Unexpected LLM error after %.2fms: %s", step_name, elapsed_ms, exc) raise LLMClientError from exc def _build_extra_body(self) -> dict: diff --git a/backend/app/services/query_decomposer.py b/backend/app/services/query_decomposer.py index 2895011..42a63dc 100644 --- a/backend/app/services/query_decomposer.py +++ b/backend/app/services/query_decomposer.py @@ -43,7 +43,7 @@ class QueryDecomposer: prompt = f"Given question: '{question}', extract key search keywords as JSON array" try: - response = await self.llm_client.complete(prompt) + response = await self.llm_client.complete(prompt, step_name="QueryDecomposer") except Exception as exc: logger.warning("LLM decomposition failed: %s", exc) return [] diff --git a/backend/app/services/rag.py b/backend/app/services/rag.py index f8ed3f7..c1a0b68 100644 --- a/backend/app/services/rag.py +++ b/backend/app/services/rag.py @@ -112,4 +112,4 @@ class RAGService: f"Answer:" ) - return await self.llm_client.complete(prompt=prompt, temperature=0.3) + return await self.llm_client.complete(prompt=prompt, temperature=0.3, step_name="ResponseGeneration") diff --git a/backend/app/services/relevance_filter.py b/backend/app/services/relevance_filter.py index 4a82f04..e5d933c 100644 --- a/backend/app/services/relevance_filter.py +++ b/backend/app/services/relevance_filter.py @@ -36,7 +36,7 @@ class RelevanceFilter: prompt = self._build_prompt(question, chunks) try: - response = await self.llm_client.complete(prompt, temperature=0.0) + response = await self.llm_client.complete(prompt, temperature=0.0, step_name="RelevanceFilter") except Exception as exc: logger.error("RelevanceFilter LLM call failed: %s", exc) return []