diff --git a/backend/app/core/config.py b/backend/app/core/config.py
index 34e508c..6477f04 100644
--- a/backend/app/core/config.py
+++ b/backend/app/core/config.py
@@ -13,6 +13,7 @@ class Settings(BaseSettings):
     llm_api_key: str = ""
     llm_model_name: str = "qwen/qwen3.5-35b-a3b"
     llm_enable_thinking: bool = False
+    vllm_engine: bool = False
 
     # Embeddings
     embedding_model: str = "qwen/qwen3-embedding-4b"
diff --git a/backend/app/services/llm_client.py b/backend/app/services/llm_client.py
index 6069367..0ea3263 100644
--- a/backend/app/services/llm_client.py
+++ b/backend/app/services/llm_client.py
@@ -83,20 +83,19 @@ class LLMClient:
     def _build_extra_body(self) -> dict:
         """Build extra_body for provider-specific parameters.
 
-        For Qwen3.5 models, disables thinking content via chat_template_kwargs
-        (vLLM/SGLang convention). When thinking is enabled, no extra params
-        are passed and the model uses its default thinking mode.
+        When thinking is enabled, no extra params are passed
+        and the model uses its default thinking mode.
+
+        vLLM:     {"chat_template_kwargs": {"enable_thinking": False}}
+        OpenRouter:  {"reasoning": {"enabled": False}}
         """
         if self.enable_thinking:
             return {}
 
-        # Non-thinking mode for Qwen3.5
-        # Uses chat_template_kwargs for vLLM/SGLang compatibility.
-        # For Alibaba Cloud Model Studio, use top-level enable_thinking instead.
-        # return {
-        #     "chat_template_kwargs": {"enable_thinking": False},
-        #     "top_k": 20,
-        # }
+        if self.settings.vllm_engine:
+            return {
+                "chat_template_kwargs": {"enable_thinking": False},
+            }
         return {"reasoning": {"enabled": False}}
 
     async def close(self):