feat(backend): add embedding client and update LLM client
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
4a22b906e4
commit
38f4c70762
|
|
@ -0,0 +1,52 @@
|
|||
import logging
|
||||
from typing import List
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class EmbeddingClient:
|
||||
"""Async embedding client for OpenRouter-compatible embeddings API."""
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
self.base_url = settings.embedding_base_url.rstrip("/")
|
||||
self.api_key = settings.embedding_api_key or settings.llm_api_key
|
||||
self.model = settings.embedding_model
|
||||
# Async HTTP client for connection pooling
|
||||
self._client: httpx.AsyncClient | None = httpx.AsyncClient(
|
||||
base_url=self.base_url,
|
||||
timeout=settings.llm_timeout,
|
||||
headers={"Authorization": f"Bearer {self.api_key}"},
|
||||
)
|
||||
|
||||
async def embed(self, texts: List[str]) -> List[List[float]]:
|
||||
if not texts:
|
||||
return []
|
||||
payload = {"model": self.model, "input": texts}
|
||||
try:
|
||||
resp = await self._client.post("/embeddings", json=payload)
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
# Common OpenAI-like response shapes
|
||||
if isinstance(data, dict):
|
||||
# OpenRouter/OpenAI style: {"data": [{"embedding": []}, ...]}
|
||||
if "data" in data and isinstance(data["data"], list):
|
||||
return [item.get("embedding", []) for item in data["data"]]
|
||||
# Alternative: {"embeddings": [[...], [...]]}
|
||||
if "embeddings" in data and isinstance(data["embeddings"], list):
|
||||
return data["embeddings"]
|
||||
# Fallback: try to extract a flat list
|
||||
if isinstance(data, list):
|
||||
return data # type: ignore[return-value]
|
||||
except Exception as exc: # pragma: no cover - network/runtime issues
|
||||
logger.error("Embedding API call failed: %s", exc)
|
||||
raise
|
||||
return []
|
||||
|
||||
async def close(self):
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
|
|
@ -1,28 +1,49 @@
|
|||
import asyncio
|
||||
import logging
|
||||
from typing import Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from app.core.config import Settings
|
||||
|
||||
|
||||
class LLMClientError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Asynchronous LLM HTTP client with connection pooling."""
|
||||
|
||||
def __init__(self, settings: Settings):
|
||||
self.settings = settings
|
||||
self.base_url = settings.llm_base_url.rstrip("/")
|
||||
self.api_key = settings.llm_api_key
|
||||
self.model = settings.llm_model_name
|
||||
self.logger = logging.getLogger(__name__)
|
||||
# Use a single shared AsyncClient for all requests
|
||||
self._client = httpx.AsyncClient(
|
||||
base_url=self.base_url,
|
||||
timeout=settings.llm_timeout,
|
||||
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
|
||||
)
|
||||
|
||||
def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||
response = httpx.post(
|
||||
f"{self.base_url}/chat/completions",
|
||||
headers={
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
},
|
||||
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||
try:
|
||||
resp = await self._client.post(
|
||||
"/chat/completions",
|
||||
json={
|
||||
"model": self.model,
|
||||
"messages": [{"role": "user", "content": prompt}],
|
||||
"temperature": temperature,
|
||||
},
|
||||
timeout=60.0,
|
||||
)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
resp.raise_for_status()
|
||||
data = resp.json()
|
||||
return data["choices"][0]["message"]["content"]
|
||||
except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
|
||||
self.logger.error("LLM API error: %s", exc)
|
||||
raise LLMClientError from exc
|
||||
|
||||
async def close(self):
|
||||
if self._client:
|
||||
await self._client.aclose()
|
||||
|
|
|
|||
Loading…
Reference in New Issue