Set default TIMEOUT value to 150, and gunicorn timeout to TIMEOUT+30

2025-08-20 22:04:32 +08:00 · 2025-08-20 22:04:32 +08:00 · 4c556d8aae
commit 4c556d8aae
parent 9b7ed84e05
5 changed files with 6 additions and 6 deletions
--- a/env.example
+++ b/env.example
@ -8,6 +8,8 @@ PORT=9621
 WEBUI_TITLE='My Graph KB'
 WEBUI_DESCRIPTION="Simple and Fast Graph Based RAG System"
 # WORKERS=2
+### gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
+# TIMEOUT=150
 # CORS_ORIGINS=http://localhost:3000,http://localhost:8080

 ### Optional SSL Configuration
@ -151,8 +153,6 @@ LLM_BINDING_API_KEY=your_api_key
 ### lightrag-server --llm-binding openai --help

 ### Ollama Server Specific Parameters
-### Time out in seconds, None for infinite timeout
-TIMEOUT=240
 ### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
 OLLAMA_LLM_NUM_CTX=32768
 ### Stop sequences for Ollama LLM
--- a/lightrag/api/README-zh.md
+++ b/lightrag/api/README-zh.md
@ -478,7 +478,7 @@ SUMMARY_LANGUAGE=Chinese
 MAX_PARALLEL_INSERT=2

 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
-TIMEOUT=200
+TIMEOUT=150
 MAX_ASYNC=4

 LLM_BINDING=openai
--- a/lightrag/api/README.md
+++ b/lightrag/api/README.md
@ -485,7 +485,7 @@ SUMMARY_LANGUAGE=Chinese
 MAX_PARALLEL_INSERT=2

 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
-TIMEOUT=200
+TIMEOUT=150
 MAX_ASYNC=4

 LLM_BINDING=openai
--- a/lightrag/api/run_with_gunicorn.py
+++ b/lightrag/api/run_with_gunicorn.py
@ -153,7 +153,7 @@ def main():

            # Timeout configuration prioritizes command line arguments
            gunicorn_config.timeout = (
-                global_args.timeout * 2
+                global_args.timeout + 30
                if global_args.timeout is not None
                else get_env_value(
                    "TIMEOUT", DEFAULT_TIMEOUT + 30, int, special_none=True
--- a/lightrag/constants.py
+++ b/lightrag/constants.py
@ -49,7 +49,7 @@ DEFAULT_MAX_PARALLEL_INSERT = 2  # Default maximum parallel insert operations
 DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8  # Default max async for embedding functions
 DEFAULT_EMBEDDING_BATCH_NUM = 10  # Default batch size for embedding computations

-# Ollama Server Timetout in seconds
+# gunicorn worker timeout(as default LLM request timeout if LLM_TIMEOUT is not set)
 DEFAULT_TIMEOUT = 150

 # Logging configuration defaults