legco_ai_assistant/backend/app/services/llm_client.py

import asyncio
import logging
from typing import Optional

import httpx

from app.core.config import Settings


class LLMClientError(Exception):
    pass


class LLMClient:
    """Asynchronous LLM HTTP client with connection pooling."""

    def __init__(self, settings: Settings):
        self.settings = settings
        self.base_url = settings.llm_base_url.rstrip("/")
        self.api_key = settings.llm_api_key
        self.model = settings.llm_model_name
        self.logger = logging.getLogger(__name__)
        # Use a single shared AsyncClient for all requests
        self._client = httpx.AsyncClient(
            base_url=self.base_url,
            timeout=settings.llm_timeout,
            headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
        )

    async def complete(self, prompt: str, temperature: float = 0.7) -> str:
        try:
            resp = await self._client.post(
                "/chat/completions",
                json={
                    "model": self.model,
                    "messages": [{"role": "user", "content": prompt}],
                    "temperature": temperature,
                },
            )
            resp.raise_for_status()
            data = resp.json()
            return data["choices"][0]["message"]["content"]
        except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
            self.logger.error("LLM API error: %s", exc)
            raise LLMClientError from exc

    async def close(self):
        if self._client:
            await self._client.aclose()