50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import asyncio
|
|
import logging
|
|
from typing import Optional
|
|
|
|
import httpx
|
|
|
|
from app.core.config import Settings
|
|
|
|
|
|
class LLMClientError(Exception):
|
|
pass
|
|
|
|
|
|
class LLMClient:
|
|
"""Asynchronous LLM HTTP client with connection pooling."""
|
|
|
|
def __init__(self, settings: Settings):
|
|
self.settings = settings
|
|
self.base_url = settings.llm_base_url.rstrip("/")
|
|
self.api_key = settings.llm_api_key
|
|
self.model = settings.llm_model_name
|
|
self.logger = logging.getLogger(__name__)
|
|
# Use a single shared AsyncClient for all requests
|
|
self._client = httpx.AsyncClient(
|
|
base_url=self.base_url,
|
|
timeout=settings.llm_timeout,
|
|
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
|
|
)
|
|
|
|
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
|
try:
|
|
resp = await self._client.post(
|
|
"/chat/completions",
|
|
json={
|
|
"model": self.model,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
"temperature": temperature,
|
|
},
|
|
)
|
|
resp.raise_for_status()
|
|
data = resp.json()
|
|
return data["choices"][0]["message"]["content"]
|
|
except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
|
|
self.logger.error("LLM API error: %s", exc)
|
|
raise LLMClientError from exc
|
|
|
|
async def close(self):
|
|
if self._client:
|
|
await self._client.aclose()
|