feat(llm): add VLLM_ENGINE env flag for provider-specific extra_body format

This commit is contained in:
Woody 2026-04-28 13:30:27 +08:00
parent aa5f716578
commit 711be3dfde
2 changed files with 10 additions and 10 deletions

View File

@ -13,6 +13,7 @@ class Settings(BaseSettings):
llm_api_key: str = ""
llm_model_name: str = "qwen/qwen3.5-35b-a3b"
llm_enable_thinking: bool = False
vllm_engine: bool = False
# Embeddings
embedding_model: str = "qwen/qwen3-embedding-4b"

View File

@ -83,20 +83,19 @@ class LLMClient:
def _build_extra_body(self) -> dict:
"""Build extra_body for provider-specific parameters.
For Qwen3.5 models, disables thinking content via chat_template_kwargs
(vLLM/SGLang convention). When thinking is enabled, no extra params
are passed and the model uses its default thinking mode.
When thinking is enabled, no extra params are passed
and the model uses its default thinking mode.
vLLM: {"chat_template_kwargs": {"enable_thinking": False}}
OpenRouter: {"reasoning": {"enabled": False}}
"""
if self.enable_thinking:
return {}
# Non-thinking mode for Qwen3.5
# Uses chat_template_kwargs for vLLM/SGLang compatibility.
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
# return {
# "chat_template_kwargs": {"enable_thinking": False},
# "top_k": 20,
# }
if self.settings.vllm_engine:
return {
"chat_template_kwargs": {"enable_thinking": False},
}
return {"reasoning": {"enabled": False}}
async def close(self):