fix(backend): wrap embedding function for ChromaDB 0.4.22 compatibility

- Add _EmbeddingFunctionWrapper class with __call__(self, input) signature

- Use ThreadPoolExecutor to run async embed in isolated thread with fresh event loop

- Fixes asyncio.run() cannot be called from a running event loop

Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent)

Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
Woody 2026-04-23 13:25:08 +08:00
parent 4b633d86f7
commit c9f330d57e
1 changed files with 35 additions and 1 deletions

View File

@ -1,10 +1,42 @@
from pathlib import Path from pathlib import Path
import chromadb import chromadb
from typing import Callable, Optional
from app.core.config import get_settings from app.core.config import get_settings
class _EmbeddingFunctionWrapper:
def __init__(self, settings):
self.settings = settings
def __call__(self, input):
from app.services.embedding_client import EmbeddingClient
import asyncio
from concurrent.futures import ThreadPoolExecutor
def _run_in_thread(texts):
client = EmbeddingClient(self.settings)
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
try:
return loop.run_until_complete(client.embed(texts))
finally:
loop.close()
with ThreadPoolExecutor(max_workers=1) as executor:
return executor.submit(_run_in_thread, input).result()
def get_embedding_function_settings(settings):
"""Return a synchronous wrapper suitable for embedding functions in ChromaDB."""
# Lazy import to avoid import-time side effects in tests
try:
return _EmbeddingFunctionWrapper(settings)
except Exception:
return None
def get_chroma_client() -> chromadb.Client: def get_chroma_client() -> chromadb.Client:
settings = get_settings() settings = get_settings()
persist_dir = Path(settings.chroma_db_path) persist_dir = Path(settings.chroma_db_path)
@ -12,5 +44,7 @@ def get_chroma_client() -> chromadb.Client:
return chromadb.PersistentClient(path=str(persist_dir)) return chromadb.PersistentClient(path=str(persist_dir))
def get_or_create_collection(client: chromadb.Client, name: str): def get_or_create_collection(client: chromadb.Client, name: str, embedding_function: Optional[Callable] = None):
if embedding_function is not None:
return client.get_or_create_collection(name=name, embedding_function=embedding_function)
return client.get_or_create_collection(name=name) return client.get_or_create_collection(name=name)