feat(llm): add VLLM_ENGINE env flag for provider-specific extra_body format
This commit is contained in:
parent
aa5f716578
commit
711be3dfde
|
|
@ -13,6 +13,7 @@ class Settings(BaseSettings):
|
||||||
llm_api_key: str = ""
|
llm_api_key: str = ""
|
||||||
llm_model_name: str = "qwen/qwen3.5-35b-a3b"
|
llm_model_name: str = "qwen/qwen3.5-35b-a3b"
|
||||||
llm_enable_thinking: bool = False
|
llm_enable_thinking: bool = False
|
||||||
|
vllm_engine: bool = False
|
||||||
|
|
||||||
# Embeddings
|
# Embeddings
|
||||||
embedding_model: str = "qwen/qwen3-embedding-4b"
|
embedding_model: str = "qwen/qwen3-embedding-4b"
|
||||||
|
|
|
||||||
|
|
@ -83,20 +83,19 @@ class LLMClient:
|
||||||
def _build_extra_body(self) -> dict:
|
def _build_extra_body(self) -> dict:
|
||||||
"""Build extra_body for provider-specific parameters.
|
"""Build extra_body for provider-specific parameters.
|
||||||
|
|
||||||
For Qwen3.5 models, disables thinking content via chat_template_kwargs
|
When thinking is enabled, no extra params are passed
|
||||||
(vLLM/SGLang convention). When thinking is enabled, no extra params
|
and the model uses its default thinking mode.
|
||||||
are passed and the model uses its default thinking mode.
|
|
||||||
|
vLLM: {"chat_template_kwargs": {"enable_thinking": False}}
|
||||||
|
OpenRouter: {"reasoning": {"enabled": False}}
|
||||||
"""
|
"""
|
||||||
if self.enable_thinking:
|
if self.enable_thinking:
|
||||||
return {}
|
return {}
|
||||||
|
|
||||||
# Non-thinking mode for Qwen3.5
|
if self.settings.vllm_engine:
|
||||||
# Uses chat_template_kwargs for vLLM/SGLang compatibility.
|
return {
|
||||||
# For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
|
"chat_template_kwargs": {"enable_thinking": False},
|
||||||
# return {
|
}
|
||||||
# "chat_template_kwargs": {"enable_thinking": False},
|
|
||||||
# "top_k": 20,
|
|
||||||
# }
|
|
||||||
return {"reasoning": {"enabled": False}}
|
return {"reasoning": {"enabled": False}}
|
||||||
|
|
||||||
async def close(self):
|
async def close(self):
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue