legco_ai_assistant/backend/app/services/llm_client.py

50 lines
1.6 KiB
Python

import asyncio
import logging
from typing import Optional
import httpx
from app.core.config import Settings
class LLMClientError(Exception):
pass
class LLMClient:
"""Asynchronous LLM HTTP client with connection pooling."""
def __init__(self, settings: Settings):
self.settings = settings
self.base_url = settings.llm_base_url.rstrip("/")
self.api_key = settings.llm_api_key
self.model = settings.llm_model_name
self.logger = logging.getLogger(__name__)
# Use a single shared AsyncClient for all requests
self._client = httpx.AsyncClient(
base_url=self.base_url,
timeout=settings.llm_timeout,
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
)
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
try:
resp = await self._client.post(
"/chat/completions",
json={
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
"temperature": temperature,
},
)
resp.raise_for_status()
data = resp.json()
return data["choices"][0]["message"]["content"]
except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
self.logger.error("LLM API error: %s", exc)
raise LLMClientError from exc
async def close(self):
if self._client:
await self._client.aclose()