diff --git a/backend/app/services/embedding_client.py b/backend/app/services/embedding_client.py new file mode 100644 index 0000000..a798b73 --- /dev/null +++ b/backend/app/services/embedding_client.py @@ -0,0 +1,52 @@ +import logging +from typing import List + +import httpx + +from app.core.config import Settings + + +logger = logging.getLogger(__name__) + + +class EmbeddingClient: + """Async embedding client for OpenRouter-compatible embeddings API.""" + + def __init__(self, settings: Settings): + self.base_url = settings.embedding_base_url.rstrip("/") + self.api_key = settings.embedding_api_key or settings.llm_api_key + self.model = settings.embedding_model + # Async HTTP client for connection pooling + self._client: httpx.AsyncClient | None = httpx.AsyncClient( + base_url=self.base_url, + timeout=settings.llm_timeout, + headers={"Authorization": f"Bearer {self.api_key}"}, + ) + + async def embed(self, texts: List[str]) -> List[List[float]]: + if not texts: + return [] + payload = {"model": self.model, "input": texts} + try: + resp = await self._client.post("/embeddings", json=payload) + resp.raise_for_status() + data = resp.json() + # Common OpenAI-like response shapes + if isinstance(data, dict): + # OpenRouter/OpenAI style: {"data": [{"embedding": []}, ...]} + if "data" in data and isinstance(data["data"], list): + return [item.get("embedding", []) for item in data["data"]] + # Alternative: {"embeddings": [[...], [...]]} + if "embeddings" in data and isinstance(data["embeddings"], list): + return data["embeddings"] + # Fallback: try to extract a flat list + if isinstance(data, list): + return data # type: ignore[return-value] + except Exception as exc: # pragma: no cover - network/runtime issues + logger.error("Embedding API call failed: %s", exc) + raise + return [] + + async def close(self): + if self._client: + await self._client.aclose() diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index 0ccd29c..f91c6d7 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -1,28 +1,49 @@ +import asyncio +import logging +from typing import Optional + import httpx from app.core.config import Settings +class LLMClientError(Exception): + pass + + class LLMClient: + """Asynchronous LLM HTTP client with connection pooling.""" + def __init__(self, settings: Settings): + self.settings = settings self.base_url = settings.llm_base_url.rstrip("/") self.api_key = settings.llm_api_key self.model = settings.llm_model_name - - def complete(self, prompt: str, temperature: float = 0.7) -> str: - response = httpx.post( - f"{self.base_url}/chat/completions", - headers={ - "Authorization": f"Bearer {self.api_key}", - "Content-Type": "application/json", - }, - json={ - "model": self.model, - "messages": [{"role": "user", "content": prompt}], - "temperature": temperature, - }, - timeout=60.0, + self.logger = logging.getLogger(__name__) + # Use a single shared AsyncClient for all requests + self._client = httpx.AsyncClient( + base_url=self.base_url, + timeout=settings.llm_timeout, + headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}, ) - response.raise_for_status() - data = response.json() - return data["choices"][0]["message"]["content"] + + async def complete(self, prompt: str, temperature: float = 0.7) -> str: + try: + resp = await self._client.post( + "/chat/completions", + json={ + "model": self.model, + "messages": [{"role": "user", "content": prompt}], + "temperature": temperature, + }, + ) + resp.raise_for_status() + data = resp.json() + return data["choices"][0]["message"]["content"] + except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc: + self.logger.error("LLM API error: %s", exc) + raise LLMClientError from exc + + async def close(self): + if self._client: + await self._client.aclose()