fix: vLLM chat_template_kwargs breaks LangChain structured output
vLLM's chat_template_kwargs leaked into LangChain's AsyncCompletions.parse() via _get_langchain_model's model_kwargs, causing structured decomposition to fail on vLLM backends. Skip vLLM-specific params when building the LangChain model — only provider-agnostic params (OpenAI reasoning) pass through.
This commit is contained in:
parent
90269608bc
commit
cbb958d75d
|
|
@ -110,10 +110,13 @@ class LLMClient:
|
|||
os.environ.setdefault("OPENAI_API_KEY", self.settings.llm_api_key)
|
||||
os.environ.setdefault("OPENAI_BASE_URL", self.settings.llm_base_url)
|
||||
|
||||
# Pass thinking/reasoning disable params via model_kwargs.
|
||||
# LangChain's ChatOpenAI forwards model_kwargs as top-level
|
||||
# request parameters, which is equivalent to OpenAI SDK's extra_body.
|
||||
model_kwargs = self._build_extra_body() or None
|
||||
# vLLM's chat_template_kwargs is incompatible with LangChain's
|
||||
# with_structured_output() — it leaks into AsyncCompletions.parse()
|
||||
# which rejects the unknown kwarg. Only provider-agnostic params
|
||||
# (e.g. OpenAI's reasoning) are safe to pass via model_kwargs.
|
||||
model_kwargs: dict[str, Any] | None = None
|
||||
if not self.settings.vllm_engine and not self.enable_thinking:
|
||||
model_kwargs = {"reasoning": {"enabled": False}}
|
||||
|
||||
self._langchain_model = init_chat_model(
|
||||
model=self.model,
|
||||
|
|
|
|||
|
|
@ -1,18 +1,18 @@
|
|||
fastapi==0.109.0
|
||||
uvicorn[standard]==0.27.0
|
||||
pydantic==2.5.3
|
||||
pydantic-settings==2.1.0
|
||||
pydantic>=2.7.4,<3.0.0
|
||||
pydantic-settings>=2.1.0
|
||||
chromadb==0.4.22
|
||||
numpy<2.0
|
||||
python-docx==1.1.0
|
||||
pypdf==4.0.2
|
||||
python-dotenv==1.0.0
|
||||
httpx==0.26.0
|
||||
openai==1.12.0
|
||||
python-docx>=1.1.0
|
||||
pypdf>=4.0.2
|
||||
python-dotenv>=1.0.0
|
||||
httpx>=0.26.0
|
||||
openai>=2.26.0,<3.0.0
|
||||
pytest==7.4.4
|
||||
pytest-asyncio==0.23.4
|
||||
tiktoken==0.5.2
|
||||
python-multipart==0.0.6
|
||||
reportlab==4.2.5
|
||||
langchain==1.2.12
|
||||
langchain-openai==1.1.11
|
||||
tiktoken>=0.7.0,<1.0.0
|
||||
python-multipart>=0.0.6
|
||||
reportlab>=4.2.5
|
||||
langchain>=1.2.12,<1.3.0
|
||||
langchain-openai>=1.1.11,<1.2.0
|
||||
|
|
|
|||
Loading…
Reference in New Issue