Add LLM_TIMEOUT configuration for all LLM providers

- Add LLM_TIMEOUT env variable - Apply timeout to all LLM bindings
2025-08-20 23:50:57 +08:00 · 2025-08-20 23:50:57 +08:00 · df7bcb1e3d
commit df7bcb1e3d
parent 4c556d8aae
6 changed files with 34 additions and 23 deletions
--- a/env.example
+++ b/env.example
@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
 ### LLM Configuration
 ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
 ###########################################################
-### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
+### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
 # TEMPERATURE=1.0
+### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
+# LLM_TIMEOUT=150
 ### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature

 LLM_BINDING=openai
--- a/lightrag/api/lightrag_server.py
+++ b/lightrag/api/lightrag_server.py
@ -254,6 +254,8 @@ def create_app(args):
    if args.embedding_binding == "jina":
        from lightrag.llm.jina import jina_embed

+    llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
+
    async def openai_alike_model_complete(
        prompt,
        system_prompt=None,
@ -267,12 +269,10 @@ def create_app(args):
        if history_messages is None:
            history_messages = []

-        # Use OpenAI LLM options if available, otherwise fallback to global temperature
-        if args.llm_binding == "openai":
-            openai_options = OpenAILLMOptions.options_dict(args)
-            kwargs.update(openai_options)
-        else:
-            kwargs["temperature"] = args.temperature
+        # Use OpenAI LLM options if available
+        openai_options = OpenAILLMOptions.options_dict(args)
+        kwargs["timeout"] = llm_timeout
+        kwargs.update(openai_options)

        return await openai_complete_if_cache(
            args.llm_model,
@ -297,12 +297,10 @@ def create_app(args):
        if history_messages is None:
            history_messages = []

-        # Use OpenAI LLM options if available, otherwise fallback to global temperature
-        if args.llm_binding == "azure_openai":
-            openai_options = OpenAILLMOptions.options_dict(args)
-            kwargs.update(openai_options)
-        else:
-            kwargs["temperature"] = args.temperature
+        # Use OpenAI LLM options
+        openai_options = OpenAILLMOptions.options_dict(args)
+        kwargs["timeout"] = llm_timeout
+        kwargs.update(openai_options)

        return await azure_openai_complete_if_cache(
            args.llm_model,
@ -451,7 +449,7 @@ def create_app(args):
            llm_model_kwargs=(
                {
                    "host": args.llm_binding_host,
-                    "timeout": args.timeout,
+                    "timeout": llm_timeout,
                    "options": OllamaLLMOptions.options_dict(args),
                    "api_key": args.llm_binding_api_key,
                }
@ -482,7 +480,7 @@ def create_app(args):
            chunk_token_size=int(args.chunk_size),
            chunk_overlap_token_size=int(args.chunk_overlap_size),
            llm_model_kwargs={
-                "timeout": args.timeout,
+                "timeout": llm_timeout,
            },
            llm_model_name=args.llm_model,
            llm_model_max_async=args.max_async,
--- a/lightrag/llm/anthropic.py
+++ b/lightrag/llm/anthropic.py
@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
    if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
        logging.getLogger("anthropic").setLevel(logging.INFO)

+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+    timeout = kwargs.pop("timeout", None)
+
    anthropic_async_client = (
-        AsyncAnthropic(default_headers=default_headers, api_key=api_key)
+        AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
        if base_url is None
        else AsyncAnthropic(
-            base_url=base_url, default_headers=default_headers, api_key=api_key
+            base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
        )
    )
-    kwargs.pop("hashing_kv", None)
+
    messages: list[dict[str, Any]] = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
--- a/lightrag/llm/azure_openai.py
+++ b/lightrag/llm/azure_openai.py
@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
        or os.getenv("OPENAI_API_VERSION")
    )

+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+    timeout = kwargs.pop("timeout", None)
+    
    openai_async_client = AsyncAzureOpenAI(
        azure_endpoint=base_url,
        azure_deployment=deployment,
        api_key=api_key,
        api_version=api_version,
+        timeout=timeout,
    )
-    kwargs.pop("hashing_kv", None)
    messages = []
    if system_prompt:
        messages.append({"role": "system", "content": system_prompt})
--- a/lightrag/llm/ollama.py
+++ b/lightrag/llm/ollama.py
@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
    # kwargs.pop("response_format", None) # allow json
    host = kwargs.pop("host", None)
    timeout = kwargs.pop("timeout", None)
+    if timeout == 0:
+        timeout = None
    kwargs.pop("hashing_kv", None)
    api_key = kwargs.pop("api_key", None)
    headers = {
--- a/lightrag/llm/openai.py
+++ b/lightrag/llm/openai.py
@ -149,17 +149,18 @@ async def openai_complete_if_cache(
    if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
        logging.getLogger("openai").setLevel(logging.INFO)

+    # Remove special kwargs that shouldn't be passed to OpenAI
+    kwargs.pop("hashing_kv", None)
+    kwargs.pop("keyword_extraction", None)
+
    # Extract client configuration options
    client_configs = kwargs.pop("openai_client_configs", {})

    # Create the OpenAI client
    openai_async_client = create_openai_async_client(
-        api_key=api_key, base_url=base_url, client_configs=client_configs
+        api_key=api_key, base_url=base_url, client_configs=client_configs,
    )

-    # Remove special kwargs that shouldn't be passed to OpenAI
-    kwargs.pop("hashing_kv", None)
-    kwargs.pop("keyword_extraction", None)

    # Prepare messages
    messages: list[dict[str, Any]] = []