feat(backend): add embedding client and update LLM client
Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
4a22b906e4
commit
38f4c70762
|
|
@ -0,0 +1,52 @@
|
||||||
|
import logging
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import httpx
|
||||||
|
|
||||||
|
from app.core.config import Settings
|
||||||
|
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
class EmbeddingClient:
|
||||||
|
"""Async embedding client for OpenRouter-compatible embeddings API."""
|
||||||
|
|
||||||
|
def __init__(self, settings: Settings):
|
||||||
|
self.base_url = settings.embedding_base_url.rstrip("/")
|
||||||
|
self.api_key = settings.embedding_api_key or settings.llm_api_key
|
||||||
|
self.model = settings.embedding_model
|
||||||
|
# Async HTTP client for connection pooling
|
||||||
|
self._client: httpx.AsyncClient | None = httpx.AsyncClient(
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=settings.llm_timeout,
|
||||||
|
headers={"Authorization": f"Bearer {self.api_key}"},
|
||||||
|
)
|
||||||
|
|
||||||
|
async def embed(self, texts: List[str]) -> List[List[float]]:
|
||||||
|
if not texts:
|
||||||
|
return []
|
||||||
|
payload = {"model": self.model, "input": texts}
|
||||||
|
try:
|
||||||
|
resp = await self._client.post("/embeddings", json=payload)
|
||||||
|
resp.raise_for_status()
|
||||||
|
data = resp.json()
|
||||||
|
# Common OpenAI-like response shapes
|
||||||
|
if isinstance(data, dict):
|
||||||
|
# OpenRouter/OpenAI style: {"data": [{"embedding": []}, ...]}
|
||||||
|
if "data" in data and isinstance(data["data"], list):
|
||||||
|
return [item.get("embedding", []) for item in data["data"]]
|
||||||
|
# Alternative: {"embeddings": [[...], [...]]}
|
||||||
|
if "embeddings" in data and isinstance(data["embeddings"], list):
|
||||||
|
return data["embeddings"]
|
||||||
|
# Fallback: try to extract a flat list
|
||||||
|
if isinstance(data, list):
|
||||||
|
return data # type: ignore[return-value]
|
||||||
|
except Exception as exc: # pragma: no cover - network/runtime issues
|
||||||
|
logger.error("Embedding API call failed: %s", exc)
|
||||||
|
raise
|
||||||
|
return []
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
|
@ -1,28 +1,49 @@
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
from app.core.config import Settings
|
from app.core.config import Settings
|
||||||
|
|
||||||
|
|
||||||
|
class LLMClientError(Exception):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
class LLMClient:
|
class LLMClient:
|
||||||
|
"""Asynchronous LLM HTTP client with connection pooling."""
|
||||||
|
|
||||||
def __init__(self, settings: Settings):
|
def __init__(self, settings: Settings):
|
||||||
|
self.settings = settings
|
||||||
self.base_url = settings.llm_base_url.rstrip("/")
|
self.base_url = settings.llm_base_url.rstrip("/")
|
||||||
self.api_key = settings.llm_api_key
|
self.api_key = settings.llm_api_key
|
||||||
self.model = settings.llm_model_name
|
self.model = settings.llm_model_name
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
# Use a single shared AsyncClient for all requests
|
||||||
|
self._client = httpx.AsyncClient(
|
||||||
|
base_url=self.base_url,
|
||||||
|
timeout=settings.llm_timeout,
|
||||||
|
headers={"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"},
|
||||||
|
)
|
||||||
|
|
||||||
def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
async def complete(self, prompt: str, temperature: float = 0.7) -> str:
|
||||||
response = httpx.post(
|
try:
|
||||||
f"{self.base_url}/chat/completions",
|
resp = await self._client.post(
|
||||||
headers={
|
"/chat/completions",
|
||||||
"Authorization": f"Bearer {self.api_key}",
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
json={
|
json={
|
||||||
"model": self.model,
|
"model": self.model,
|
||||||
"messages": [{"role": "user", "content": prompt}],
|
"messages": [{"role": "user", "content": prompt}],
|
||||||
"temperature": temperature,
|
"temperature": temperature,
|
||||||
},
|
},
|
||||||
timeout=60.0,
|
|
||||||
)
|
)
|
||||||
response.raise_for_status()
|
resp.raise_for_status()
|
||||||
data = response.json()
|
data = resp.json()
|
||||||
return data["choices"][0]["message"]["content"]
|
return data["choices"][0]["message"]["content"]
|
||||||
|
except (httpx.TimeoutException, httpx.HTTPStatusError, httpx.RequestError) as exc:
|
||||||
|
self.logger.error("LLM API error: %s", exc)
|
||||||
|
raise LLMClientError from exc
|
||||||
|
|
||||||
|
async def close(self):
|
||||||
|
if self._client:
|
||||||
|
await self._client.aclose()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue