diff --git a/env.example b/env.example
index db590761..56bc243e 100644
--- a/env.example
+++ b/env.example
@@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
 ###########################################################
-### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
+### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
 # TEMPERATURE=1.0
+### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
+# LLM_TIMEOUT=150
 ### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
 
 LLM_BINDING=openai
diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py
index c3384181..e84686cb 100644
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@@ -254,6 +254,8 @@ def create_app(args):
     if args.embedding_binding == "jina":
         from lightrag.llm.jina import jina_embed
 
+    llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
+
     async def openai_alike_model_complete(
         prompt,
         system_prompt=None,
@@ -267,12 +269,10 @@ def create_app(args):
         if history_messages is None:
             history_messages = []
 
-        # Use OpenAI LLM options if available, otherwise fallback to global temperature
-        if args.llm_binding == "openai":
-            openai_options = OpenAILLMOptions.options_dict(args)
-            kwargs.update(openai_options)
-        else:
-            kwargs["temperature"] = args.temperature
+        # Use OpenAI LLM options if available
+        openai_options = OpenAILLMOptions.options_dict(args)
+        kwargs["timeout"] = llm_timeout
+        kwargs.update(openai_options)
 
         return await openai_complete_if_cache(
             args.llm_model,
@@ -297,12 +297,10 @@ def create_app(args):
         if history_messages is None:
             history_messages = []
 
-        # Use OpenAI LLM options if available, otherwise fallback to global temperature
-        if args.llm_binding == "azure_openai":
-            openai_options = OpenAILLMOptions.options_dict(args)
-            kwargs.update(openai_options)
-        else:
-            kwargs["temperature"] = args.temperature
+        # Use OpenAI LLM options
+        openai_options = OpenAILLMOptions.options_dict(args)
+        kwargs["timeout"] = llm_timeout
+        kwargs.update(openai_options)
 
         return await azure_openai_complete_if_cache(
             args.llm_model,
@@ -451,7 +449,7 @@ def create_app(args):
             llm_model_kwargs=(
                 {
                     "host": args.llm_binding_host,
-                    "timeout": args.timeout,
+                    "timeout": llm_timeout,
                     "options": OllamaLLMOptions.options_dict(args),
                     "api_key": args.llm_binding_api_key,
                 }
@@ -482,7 +480,7 @@ def create_app(args):
             chunk_token_size=int(args.chunk_size),
             chunk_overlap_token_size=int(args.chunk_overlap_size),
             llm_model_kwargs={
-                "timeout": args.timeout,
+                "timeout": llm_timeout,
             },
             llm_model_name=args.llm_model,
             llm_model_max_async=args.max_async,
diff --git a/lightrag/llm/anthropic.py b/lightrag/llm/anthropic.py
index 7878c8f0..b7a7dfaa 100644
--- a/lightrag/llm/anthropic.py
+++ b/lightrag/llm/anthropic.py
@@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
     if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
         logging.getLogger("anthropic").setLevel(logging.INFO)
 
+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+    timeout = kwargs.pop("timeout", None)
+
     anthropic_async_client = (
-        AsyncAnthropic(default_headers=default_headers, api_key=api_key)
+        AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
         if base_url is None
         else AsyncAnthropic(
-            base_url=base_url, default_headers=default_headers, api_key=api_key
+            base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
         )
     )
-    kwargs.pop("hashing_kv", None)
+
     messages: list[dict[str, Any]] = []
     if system_prompt:
         messages.append({"role": "system", "content": system_prompt})
diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py
index 60d2c18e..adec391a 100644
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
         or os.getenv("OPENAI_API_VERSION")
     )
 
+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+    timeout = kwargs.pop("timeout", None)
+    
     openai_async_client = AsyncAzureOpenAI(
         azure_endpoint=base_url,
         azure_deployment=deployment,
         api_key=api_key,
         api_version=api_version,
+        timeout=timeout,
     )
-    kwargs.pop("hashing_kv", None)
     messages = []
     if system_prompt:
         messages.append({"role": "system", "content": system_prompt})
diff --git a/lightrag/llm/ollama.py b/lightrag/llm/ollama.py
index 1ca5504e..6423fa90 100644
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
     # kwargs.pop("response_format", None) # allow json
     host = kwargs.pop("host", None)
     timeout = kwargs.pop("timeout", None)
+    if timeout == 0:
+        timeout = None
     kwargs.pop("hashing_kv", None)
     api_key = kwargs.pop("api_key", None)
     headers = {
diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py
index 910d1812..f920e392 100644
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@@ -149,17 +149,18 @@ async def openai_complete_if_cache(
     if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
         logging.getLogger("openai").setLevel(logging.INFO)
 
+    # Remove special kwargs that shouldn't be passed to OpenAI
+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+
     # Extract client configuration options
     client_configs = kwargs.pop("openai_client_configs", {})
 
     # Create the OpenAI client
     openai_async_client = create_openai_async_client(
-        api_key=api_key, base_url=base_url, client_configs=client_configs
+        api_key=api_key, base_url=base_url, client_configs=client_configs,
     )
 
-    # Remove special kwargs that shouldn't be passed to OpenAI
-    kwargs.pop("hashing_kv", None)
-    kwargs.pop("keyword_extraction", None)
 
     # Prepare messages
     messages: list[dict[str, Any]] = []