diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py index 816293f..d8e38db 100644 --- a/backend/app/services/llm_client.py +++ b/backend/app/services/llm_client.py @@ -110,10 +110,13 @@ class LLMClient: os.environ.setdefault("OPENAI_API_KEY", self.settings.llm_api_key) os.environ.setdefault("OPENAI_BASE_URL", self.settings.llm_base_url) - # Pass thinking/reasoning disable params via model_kwargs. - # LangChain's ChatOpenAI forwards model_kwargs as top-level - # request parameters, which is equivalent to OpenAI SDK's extra_body. - model_kwargs = self._build_extra_body() or None + # vLLM's chat_template_kwargs is incompatible with LangChain's + # with_structured_output() — it leaks into AsyncCompletions.parse() + # which rejects the unknown kwarg. Only provider-agnostic params + # (e.g. OpenAI's reasoning) are safe to pass via model_kwargs. + model_kwargs: dict[str, Any] | None = None + if not self.settings.vllm_engine and not self.enable_thinking: + model_kwargs = {"reasoning": {"enabled": False}} self._langchain_model = init_chat_model( model=self.model, diff --git a/backend/requirements.txt b/backend/requirements.txt index c82c984..327d08f 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,18 +1,18 @@ fastapi==0.109.0 uvicorn[standard]==0.27.0 -pydantic==2.5.3 -pydantic-settings==2.1.0 +pydantic>=2.7.4,<3.0.0 +pydantic-settings>=2.1.0 chromadb==0.4.22 numpy<2.0 -python-docx==1.1.0 -pypdf==4.0.2 -python-dotenv==1.0.0 -httpx==0.26.0 -openai==1.12.0 +python-docx>=1.1.0 +pypdf>=4.0.2 +python-dotenv>=1.0.0 +httpx>=0.26.0 +openai>=2.26.0,<3.0.0 pytest==7.4.4 pytest-asyncio==0.23.4 -tiktoken==0.5.2 -python-multipart==0.0.6 -reportlab==4.2.5 -langchain==1.2.12 -langchain-openai==1.1.11 +tiktoken>=0.7.0,<1.0.0 +python-multipart>=0.0.6 +reportlab>=4.2.5 +langchain>=1.2.12,<1.3.0 +langchain-openai>=1.1.11,<1.2.0