import asyncio import logging from typing import Optional import httpx from app.core.config import Settings class LLMClientError(Exception): pass class LLMClient: """Asynchronous LLM HTTP client with connection pooling.""" def __init__(self, settings: Settings): self.settings = settings self.base_url = settings.llm_base_url.rstrip("/") self.api_key = settings.llm_api_key self.model = settings.llm_model_name self.logger = logging.getLogger(__name__) # Use a single shared AsyncClient for all requests self._client = httpx.AsyncClient( base_url=self.base_url, timeout=settings.llm_timeout, headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}, ) async def complete(self, prompt: str, temperature: float = 0.7) -> str: try: resp = await self._client.post( "/chat/completions", json={ "model": self.model, "messages": [{"role": "user", "content": prompt}], "temperature": temperature, }, ) resp.raise_for_status() data = resp.json() return data["choices"][0]["message"]["content"] except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc: self.logger.error("LLM API error: %s", exc) raise LLMClientError from exc async def close(self): if self._client: await self._client.aclose()