feat(backend): add embedding client and update LLM client

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
2026-04-23 13:26:43 +08:00 · 2026-04-23 13:26:43 +08:00 · 38f4c70762
parent 4a22b906e4
commit 38f4c70762
2 changed files with 90 additions and 17 deletions
--- a/backend/app/services/embedding_client.py
+++ b/backend/app/services/embedding_client.py
@ -0,0 +1,52 @@
 import logging
 from typing import List
 import httpx
 from app.core.config import Settings
 logger = logging.getLogger(__name__)
 class EmbeddingClient:
    """Async embedding client for OpenRouter-compatible embeddings API."""
    def __init__(self, settings: Settings):
        self.base_url = settings.embedding_base_url.rstrip("/")
        self.api_key = settings.embedding_api_key or settings.llm_api_key
        self.model = settings.embedding_model
        # Async HTTP client for connection pooling
        self._client: httpx.AsyncClient | None = httpx.AsyncClient(
            base_url=self.base_url,
            timeout=settings.llm_timeout,
            headers={"Authorization": f"Bearer {self.api_key}"},
        )
    async def embed(self, texts: List[str]) -> List[List[float]]:
        if not texts:
            return []
        payload = {"model": self.model, "input": texts}
        try:
            resp = await self._client.post("/embeddings", json=payload)
            resp.raise_for_status()
            data = resp.json()
            # Common OpenAI-like response shapes
            if isinstance(data, dict):
                # OpenRouter/OpenAI style: {"data": [{"embedding": []}, ...]}
                if "data" in data and isinstance(data["data"], list):
                    return [item.get("embedding", []) for item in data["data"]]
                # Alternative: {"embeddings": [[...], [...]]}
                if "embeddings" in data and isinstance(data["embeddings"], list):
                    return data["embeddings"]
            # Fallback: try to extract a flat list
            if isinstance(data, list):
                return data  # type: ignore[return-value]
        except Exception as exc:  # pragma: no cover - network/runtime issues
            logger.error("Embedding API call failed: %s", exc)
            raise
        return []
    async def close(self):
        if self._client:
            await self._client.aclose()
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@ -1,28 +1,49 @@
 import asyncio
 import logging
 from typing import Optional
 import httpx
 from app.core.config import Settings
 class LLMClientError(Exception):
    pass
 class LLMClient:
    """Asynchronous LLM HTTP client with connection pooling."""
    def __init__(self, settings: Settings):
        self.settings = settings
        self.base_url = settings.llm_base_url.rstrip("/")
        self.api_key = settings.llm_api_key
        self.model = settings.llm_model_name
        self.logger = logging.getLogger(__name__)
        # Use a single shared AsyncClient for all requests
        self._client = httpx.AsyncClient(
            base_url=self.base_url,
            timeout=settings.llm_timeout,
            headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
        )
-    def complete(self, prompt: str, temperature: float = 0.7) -> str:
+    async def complete(self, prompt: str, temperature: float = 0.7) -> str:
-        response = httpx.post(
+        try:
-            f"{self.base_url}/chat/completions",
+            resp = await self._client.post(
-            headers={
+                "/chat/completions",
                "Authorization": f"Bearer {self.api_key}",
                "Content-Type": "application/json",
            },
                json={
                    "model": self.model,
                    "messages": [{"role": "user", "content": prompt}],
                    "temperature": temperature,
                },
            timeout=60.0,
            )
-        response.raise_for_status()
+            resp.raise_for_status()
-        data = response.json()
+            data = resp.json()
            return data["choices"][0]["message"]["content"]
        except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
            self.logger.error("LLM API error: %s", exc)
            raise LLMClientError from exc
    async def close(self):
        if self._client:
            await self._client.aclose()