diff --git a/env.example b/env.example
index 56bc243e..c0fc7567 100644
--- a/env.example
+++ b/env.example
@@ -127,9 +127,7 @@ MAX_PARALLEL_INSERT=2
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
 ###########################################################
-### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
-# TEMPERATURE=1.0
-### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
+### LLM request timeout setting for all llm (set to TIMEOUT if not specified, 0 means no timeout for Ollma)
 # LLM_TIMEOUT=150
 ### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
 
@@ -151,6 +149,7 @@ LLM_BINDING_API_KEY=your_api_key
 ### OpenAI Specific Parameters
 ### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
 # OPENAI_LLM_FREQUENCY_PENALTY=1.1
+# OPENAI_LLM_TEMPERATURE=1.0
 ### use the following command to see all support options for openai and azure_openai
 ### lightrag-server --llm-binding openai --help
 
@@ -164,6 +163,9 @@ OLLAMA_LLM_NUM_CTX=32768
 ### use the following command to see all support options for Ollama LLM
 ### lightrag-server --llm-binding ollama --help
 
+### Bedrock Specific Parameters
+# BEDROCK_LLM_TEMPERATURE=1.0
+
 ####################################################################################
 ### Embedding Configuration (Should not be changed after the first file processed)
 ####################################################################################
diff --git a/lightrag/api/config.py b/lightrag/api/config.py
index 01d0dd75..756fd7d2 100644
--- a/lightrag/api/config.py
+++ b/lightrag/api/config.py
@@ -35,7 +35,6 @@ from lightrag.constants import (
     DEFAULT_EMBEDDING_BATCH_NUM,
     DEFAULT_OLLAMA_MODEL_NAME,
     DEFAULT_OLLAMA_MODEL_TAG,
-    DEFAULT_TEMPERATURE,
 )
 
 # use the .env that is inside the current folder
@@ -264,14 +263,6 @@ def parse_args() -> argparse.Namespace:
     elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
         OpenAILLMOptions.add_args(parser)
 
-    # Add global temperature command line argument
-    parser.add_argument(
-        "--temperature",
-        type=float,
-        default=get_env_value("TEMPERATURE", DEFAULT_TEMPERATURE, float),
-        help="Global temperature setting for LLM (default: from env TEMPERATURE or 0.1)",
-    )
-
     args = parser.parse_args()
 
     # convert relative path to absolute path
@@ -330,32 +321,6 @@ def parse_args() -> argparse.Namespace:
     )
     args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
 
-    # Handle Ollama LLM temperature with priority cascade when llm-binding is ollama
-    if args.llm_binding == "ollama":
-        # Priority order (highest to lowest):
-        # 1. --ollama-llm-temperature command argument
-        # 2. OLLAMA_LLM_TEMPERATURE environment variable
-        # 3. --temperature command argument
-        # 4. TEMPERATURE environment variable
-
-        # Check if --ollama-llm-temperature was explicitly provided in command line
-        if "--ollama-llm-temperature" not in sys.argv:
-            # Use args.temperature which handles --temperature command arg and TEMPERATURE env var priority
-            args.ollama_llm_temperature = args.temperature
-
-    # Handle OpenAI LLM temperature with priority cascade when llm-binding is openai or azure_openai
-    if args.llm_binding in ["openai", "azure_openai"]:
-        # Priority order (highest to lowest):
-        # 1. --openai-llm-temperature command argument
-        # 2. OPENAI_LLM_TEMPERATURE environment variable
-        # 3. --temperature command argument
-        # 4. TEMPERATURE environment variable
-
-        # Check if --openai-llm-temperature was explicitly provided in command line
-        if "--openai-llm-temperature" not in sys.argv:
-            # Use args.temperature which handles --temperature command arg and TEMPERATURE env var priority
-            args.openai_llm_temperature = args.temperature
-
     # Select Document loading tool (DOCLING, DEFAULT)
     args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
 
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index e84686cb..708fedd2 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -327,7 +327,7 @@ def create_app(args):
             history_messages = []
 
         # Use global temperature for Bedrock
-        kwargs["temperature"] = args.temperature
+        kwargs["temperature"] = get_env_value("BEDROCK_LLM_TEMPERATURE", 1.0, float)
 
         return await bedrock_complete_if_cache(
             args.llm_model,
@@ -479,9 +479,6 @@ def create_app(args):
             llm_model_func=azure_openai_model_complete,
             chunk_token_size=int(args.chunk_size),
             chunk_overlap_token_size=int(args.chunk_overlap_size),
-            llm_model_kwargs={
-                "timeout": llm_timeout,
-            },
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
             summary_max_tokens=args.max_tokens,
diff --git a/lightrag/api/utils_api.py b/lightrag/api/utils_api.py
index 90a1eb96..fc05716c 100644
--- a/lightrag/api/utils_api.py
+++ b/lightrag/api/utils_api.py
@@ -201,6 +201,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.port}")
     ASCIIColors.white("    ├─ Workers: ", end="")
     ASCIIColors.yellow(f"{args.workers}")
+    ASCIIColors.white("    ├─ Timeout: ", end="")
+    ASCIIColors.yellow(f"{args.timeout}")
     ASCIIColors.white("    ├─ CORS Origins: ", end="")
     ASCIIColors.yellow(f"{args.cors_origins}")
     ASCIIColors.white("    ├─ SSL Enabled: ", end="")
@@ -238,14 +240,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
     ASCIIColors.yellow(f"{args.llm_binding_host}")
     ASCIIColors.white("    ├─ Model: ", end="")
     ASCIIColors.yellow(f"{args.llm_model}")
-    ASCIIColors.white("    ├─ Temperature: ", end="")
-    ASCIIColors.yellow(f"{args.temperature}")
     ASCIIColors.white("    ├─ Max Async for LLM: ", end="")
     ASCIIColors.yellow(f"{args.max_async}")
     ASCIIColors.white("    ├─ Max Tokens: ", end="")
     ASCIIColors.yellow(f"{args.max_tokens}")
-    ASCIIColors.white("    ├─ Timeout: ", end="")
-    ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
     ASCIIColors.white("    ├─ LLM Cache Enabled: ", end="")
     ASCIIColors.yellow(f"{args.enable_llm_cache}")
     ASCIIColors.white("    └─ LLM Cache for Extraction Enabled: ", end="")
diff --git a/lightrag/llm/Readme.md b/lightrag/llm/Readme.md
index c907fd4d..fc00d071 100644
--- a/lightrag/llm/Readme.md
+++ b/lightrag/llm/Readme.md
@@ -36,7 +36,6 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
             llm_instance = OpenAI(
                 model="gpt-4",
                 api_key="your-openai-key",
-                temperature=0.7,
             )
             kwargs['llm_instance'] = llm_instance
 
@@ -91,7 +90,6 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
                 model=f"openai/{settings.LLM_MODEL}",  # Format: "provider/model_name"
                 api_base=settings.LITELLM_URL,
                 api_key=settings.LITELLM_KEY,
-                temperature=0.7,
             )
             kwargs['llm_instance'] = llm_instance
 
diff --git a/lightrag/llm/anthropic.py b/lightrag/llm/anthropic.py
index b7a7dfaa..98a997d5 100644
--- a/lightrag/llm/anthropic.py
+++ b/lightrag/llm/anthropic.py
@@ -82,10 +82,15 @@ async def anthropic_complete_if_cache(
     timeout = kwargs.pop("timeout", None)
 
     anthropic_async_client = (
-        AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
+        AsyncAnthropic(
+            default_headers=default_headers, api_key=api_key, timeout=timeout
+        )
         if base_url is None
         else AsyncAnthropic(
-            base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
+            base_url=base_url,
+            default_headers=default_headers,
+            api_key=api_key,
+            timeout=timeout,
         )
     )
 
diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py
index adec391a..0ede0824 100644
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -62,7 +62,7 @@ async def azure_openai_complete_if_cache(
     kwargs.pop("hashing_kv", None)
     kwargs.pop("keyword_extraction", None)
     timeout = kwargs.pop("timeout", None)
-    
+
     openai_async_client = AsyncAzureOpenAI(
         azure_endpoint=base_url,
         azure_deployment=deployment,
diff --git a/lightrag/llm/lollms.py b/lightrag/llm/lollms.py
index 357b65bf..39b64ce3 100644
--- a/lightrag/llm/lollms.py
+++ b/lightrag/llm/lollms.py
@@ -59,7 +59,7 @@ async def lollms_model_if_cache(
         "personality": kwargs.get("personality", -1),
         "n_predict": kwargs.get("n_predict", None),
         "stream": stream,
-        "temperature": kwargs.get("temperature", 0.8),
+        "temperature": kwargs.get("temperature", 1.0),
         "top_k": kwargs.get("top_k", 50),
         "top_p": kwargs.get("top_p", 0.95),
         "repeat_penalty": kwargs.get("repeat_penalty", 0.8),
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index f920e392..3bd652f4 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -158,10 +158,11 @@ async def openai_complete_if_cache(
 
     # Create the OpenAI client
     openai_async_client = create_openai_async_client(
-        api_key=api_key, base_url=base_url, client_configs=client_configs,
+        api_key=api_key,
+        base_url=base_url,
+        client_configs=client_configs,
     )
 
-
     # Prepare messages
     messages: list[dict[str, Any]] = []
     if system_prompt: