feat(backend): add embedding client and update LLM client

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-04-23 13:26:43 +08:00
parent 4a22b906e4
commit 38f4c70762
2 changed files with 90 additions and 17 deletions

View File

@ -0,0 +1,52 @@
import logging
from typing import List
import httpx
from app.core.config import Settings
logger = logging.getLogger(__name__)
class EmbeddingClient:
"""Async embedding client for OpenRouter-compatible embeddings API."""
def __init__(self, settings: Settings):
self.base_url = settings.embedding_base_url.rstrip("/")
self.api_key = settings.embedding_api_key or settings.llm_api_key
self.model = settings.embedding_model
# Async HTTP client for connection pooling
self._client: httpx.AsyncClient | None = httpx.AsyncClient(
base_url=self.base_url,
timeout=settings.llm_timeout,
headers={"Authorization": f"Bearer {self.api_key}"},
)
async def embed(self, texts: List[str]) -> List[List[float]]:
if not texts:
return []
payload = {"model": self.model, "input": texts}
try:
resp = await self._client.post("/embeddings", json=payload)
resp.raise_for_status()
data = resp.json()
# Common OpenAI-like response shapes
if isinstance(data, dict):
# OpenRouter/OpenAI style: {"data": [{"embedding": []}, ...]}
if "data" in data and isinstance(data["data"], list):
return [item.get("embedding", []) for item in data["data"]]
# Alternative: {"embeddings": [[...], [...]]}
if "embeddings" in data and isinstance(data["embeddings"], list):
return data["embeddings"]
# Fallback: try to extract a flat list
if isinstance(data, list):
return data # type: ignore[return-value]
except Exception as exc: # pragma: no cover - network/runtime issues
logger.error("Embedding API call failed: %s", exc)
raise
return []
async def close(self):
if self._client:
await self._client.aclose()

View File

@ -1,28 +1,49 @@
import asyncio
import logging
from typing import Optional
import httpx
from app.core.config import Settings
class LLMClientError(Exception):
pass
class LLMClient:
"""Asynchronous LLM HTTP client with connection pooling."""
def __init__(self, settings: Settings):
self.settings = settings
self.base_url = settings.llm_base_url.rstrip("/")
self.api_key = settings.llm_api_key
self.model = settings.llm_model_name
self.logger = logging.getLogger(__name__)
# Use a single shared AsyncClient for all requests
self._client = httpx.AsyncClient(
base_url=self.base_url,
timeout=settings.llm_timeout,
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
)
def complete(self, prompt: str, temperature: float = 0.7) -> str:
response = httpx.post(
f"{self.base_url}/chat/completions",
headers={
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
},
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
try:
resp = await self._client.post(
"/chat/completions",
json={
"model": self.model,
"messages": [{"role": "user", "content": prompt}],
"temperature": temperature,
},
timeout=60.0,
)
response.raise_for_status()
data = response.json()
resp.raise_for_status()
data = resp.json()
return data["choices"][0]["message"]["content"]
except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
self.logger.error("LLM API error: %s", exc)
raise LLMClientError from exc
async def close(self):
if self._client:
await self._client.aclose()