fix: vLLM chat_template_kwargs breaks LangChain structured output

vLLM's chat_template_kwargs leaked into LangChain's AsyncCompletions.parse()
via _get_langchain_model's model_kwargs, causing structured decomposition
to fail on vLLM backends. Skip vLLM-specific params when building the
LangChain model — only provider-agnostic params (OpenAI reasoning) pass through.
This commit is contained in:
Woody 2026-04-29 16:07:44 +08:00
parent 90269608bc
commit cbb958d75d
2 changed files with 19 additions and 16 deletions

View File

@ -110,10 +110,13 @@ class LLMClient:
os.environ.setdefault("OPENAI_API_KEY", self.settings.llm_api_key) os.environ.setdefault("OPENAI_API_KEY", self.settings.llm_api_key)
os.environ.setdefault("OPENAI_BASE_URL", self.settings.llm_base_url) os.environ.setdefault("OPENAI_BASE_URL", self.settings.llm_base_url)
# Pass thinking/reasoning disable params via model_kwargs. # vLLM's chat_template_kwargs is incompatible with LangChain's
# LangChain's ChatOpenAI forwards model_kwargs as top-level # with_structured_output() — it leaks into AsyncCompletions.parse()
# request parameters, which is equivalent to OpenAI SDK's extra_body. # which rejects the unknown kwarg. Only provider-agnostic params
model_kwargs = self._build_extra_body() or None # (e.g. OpenAI's reasoning) are safe to pass via model_kwargs.
model_kwargs: dict[str, Any] | None = None
if not self.settings.vllm_engine and not self.enable_thinking:
model_kwargs = {"reasoning": {"enabled": False}}
self._langchain_model = init_chat_model( self._langchain_model = init_chat_model(
model=self.model, model=self.model,

View File

@ -1,18 +1,18 @@
fastapi==0.109.0 fastapi==0.109.0
uvicorn[standard]==0.27.0 uvicorn[standard]==0.27.0
pydantic==2.5.3 pydantic>=2.7.4,<3.0.0
pydantic-settings==2.1.0 pydantic-settings>=2.1.0
chromadb==0.4.22 chromadb==0.4.22
numpy<2.0 numpy<2.0
python-docx==1.1.0 python-docx>=1.1.0
pypdf==4.0.2 pypdf>=4.0.2
python-dotenv==1.0.0 python-dotenv>=1.0.0
httpx==0.26.0 httpx>=0.26.0
openai==1.12.0 openai>=2.26.0,<3.0.0
pytest==7.4.4 pytest==7.4.4
pytest-asyncio==0.23.4 pytest-asyncio==0.23.4
tiktoken==0.5.2 tiktoken>=0.7.0,<1.0.0
python-multipart==0.0.6 python-multipart>=0.0.6
reportlab==4.2.5 reportlab>=4.2.5
langchain==1.2.12 langchain>=1.2.12,<1.3.0
langchain-openai==1.1.11 langchain-openai>=1.1.11,<1.2.0