fix(backend): wrap embedding function for ChromaDB 0.4.22 compatibility
- Add _EmbeddingFunctionWrapper class with __call__(self, input) signature - Use ThreadPoolExecutor to run async embed in isolated thread with fresh event loop - Fixes asyncio.run() cannot be called from a running event loop Ultraworked with [Sisyphus](https://github.com/code-yeongyu/oh-my-openagent) Co-authored-by: Sisyphus <clio-agent@sisyphuslabs.ai>
This commit is contained in:
parent
4b633d86f7
commit
c9f330d57e
|
|
@ -1,10 +1,42 @@
|
|||
from pathlib import Path
|
||||
|
||||
import chromadb
|
||||
from typing import Callable, Optional
|
||||
|
||||
from app.core.config import get_settings
|
||||
|
||||
|
||||
class _EmbeddingFunctionWrapper:
|
||||
def __init__(self, settings):
|
||||
self.settings = settings
|
||||
|
||||
def __call__(self, input):
|
||||
from app.services.embedding_client import EmbeddingClient
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
def _run_in_thread(texts):
|
||||
client = EmbeddingClient(self.settings)
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
try:
|
||||
return loop.run_until_complete(client.embed(texts))
|
||||
finally:
|
||||
loop.close()
|
||||
|
||||
with ThreadPoolExecutor(max_workers=1) as executor:
|
||||
return executor.submit(_run_in_thread, input).result()
|
||||
|
||||
|
||||
def get_embedding_function_settings(settings):
|
||||
"""Return a synchronous wrapper suitable for embedding functions in ChromaDB."""
|
||||
# Lazy import to avoid import-time side effects in tests
|
||||
try:
|
||||
return _EmbeddingFunctionWrapper(settings)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_chroma_client() -> chromadb.Client:
|
||||
settings = get_settings()
|
||||
persist_dir = Path(settings.chroma_db_path)
|
||||
|
|
@ -12,5 +44,7 @@ def get_chroma_client() -> chromadb.Client:
|
|||
return chromadb.PersistentClient(path=str(persist_dir))
|
||||
|
||||
|
||||
def get_or_create_collection(client: chromadb.Client, name: str):
|
||||
def get_or_create_collection(client: chromadb.Client, name: str, embedding_function: Optional[Callable] = None):
|
||||
if embedding_function is not None:
|
||||
return client.get_or_create_collection(name=name, embedding_function=embedding_function)
|
||||
return client.get_or_create_collection(name=name)
|
||||
|
|
|
|||
Loading…
Reference in New Issue