fix: add full input/output logging to vLLM structured output path
Log the complete prompt, schema, extra_body content, full API response, token counts, and full parsed JSON output. Add exc_info=True tracebacks on all failure paths.
This commit is contained in:
parent
3ab6fd102a
commit
16de8394aa
|
|
@ -140,10 +140,10 @@ class LLMClient:
|
||||||
async def _complete_structured_vllm(self, prompt: str, pydantic_model, step_name: str, start_time: float):
|
async def _complete_structured_vllm(self, prompt: str, pydantic_model, step_name: str, start_time: float):
|
||||||
"""Use vLLM's native guided_json via extra_body for structured output."""
|
"""Use vLLM's native guided_json via extra_body for structured output."""
|
||||||
schema = pydantic_model.model_json_schema()
|
schema = pydantic_model.model_json_schema()
|
||||||
|
prompt_preview = self._truncate_prompt_for_log(prompt, first_chars=300, last_chars=100)
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"[%s] vLLM structured: sending guided_json schema=%s",
|
"[%s] vLLM structured: prompt=%s schema=%s",
|
||||||
step_name,
|
step_name, prompt_preview, json.dumps(schema)[:300],
|
||||||
json.dumps(schema)[:300],
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Try the new unified format first, then legacy guided_json
|
# Try the new unified format first, then legacy guided_json
|
||||||
|
|
@ -152,18 +152,22 @@ class LLMClient:
|
||||||
("guided_json", {"guided_json": schema}),
|
("guided_json", {"guided_json": schema}),
|
||||||
]:
|
]:
|
||||||
try:
|
try:
|
||||||
self.logger.info("[%s] vLLM structured: trying format=%s", step_name, fmt_name)
|
self.logger.info("[%s] vLLM structured: trying format=%s extra=%s", step_name, fmt_name, extra)
|
||||||
response = await self._client.chat.completions.create(
|
response = await self._client.chat.completions.create(
|
||||||
model=self.model,
|
model=self.model,
|
||||||
messages=[{"role": "user", "content": prompt}],
|
messages=[{"role": "user", "content": prompt}],
|
||||||
temperature=0.0,
|
temperature=0.0,
|
||||||
extra_body=extra,
|
extra_body=extra,
|
||||||
)
|
)
|
||||||
|
self.logger.info("[%s] vLLM structured full response: %s", step_name, response)
|
||||||
content = response.choices[0].message.content or ""
|
content = response.choices[0].message.content or ""
|
||||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
"[%s] vLLM structured succeeded with format=%s in %.2fms. Response: %s",
|
"[%s] vLLM structured succeeded format=%s in %.2fms tokens=(%s/%s). Parsed=%s",
|
||||||
step_name, fmt_name, elapsed_ms, content[:200],
|
step_name, fmt_name, elapsed_ms,
|
||||||
|
response.usage.prompt_tokens if response.usage else "?",
|
||||||
|
response.usage.completion_tokens if response.usage else "?",
|
||||||
|
content,
|
||||||
)
|
)
|
||||||
return pydantic_model.model_validate_json(content)
|
return pydantic_model.model_validate_json(content)
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
|
|
@ -171,6 +175,7 @@ class LLMClient:
|
||||||
self.logger.warning(
|
self.logger.warning(
|
||||||
"[%s] vLLM structured format=%s failed after %.2fms: %s",
|
"[%s] vLLM structured format=%s failed after %.2fms: %s",
|
||||||
step_name, fmt_name, elapsed_ms, exc,
|
step_name, fmt_name, elapsed_ms, exc,
|
||||||
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
elapsed_ms = (time.perf_counter() - start_time) * 1000
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue