diff --git a/env.example b/env.example index db590761..56bc243e 100644 --- a/env.example +++ b/env.example @@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2 ### LLM Configuration ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock ########################################################### -### LLM temperature setting for all llm binding (openai, azure_openai, ollama) +### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama) # TEMPERATURE=1.0 +### LLM request timeout setting for all llm (set to TIMEOUT if not specified) +# LLM_TIMEOUT=150 ### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature LLM_BINDING=openai diff --git a/lightrag/api/lightrag_server.py b/lightrag/api/lightrag_server.py index c3384181..e84686cb 100644 --- a/lightrag/api/lightrag_server.py +++ b/lightrag/api/lightrag_server.py @@ -254,6 +254,8 @@ def create_app(args): if args.embedding_binding == "jina": from lightrag.llm.jina import jina_embed + llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int) + async def openai_alike_model_complete( prompt, system_prompt=None, @@ -267,12 +269,10 @@ def create_app(args): if history_messages is None: history_messages = [] - # Use OpenAI LLM options if available, otherwise fallback to global temperature - if args.llm_binding == "openai": - openai_options = OpenAILLMOptions.options_dict(args) - kwargs.update(openai_options) - else: - kwargs["temperature"] = args.temperature + # Use OpenAI LLM options if available + openai_options = OpenAILLMOptions.options_dict(args) + kwargs["timeout"] = llm_timeout + kwargs.update(openai_options) return await openai_complete_if_cache( args.llm_model, @@ -297,12 +297,10 @@ def create_app(args): if history_messages is None: history_messages = [] - # Use OpenAI LLM options if available, otherwise fallback to global temperature - if args.llm_binding == "azure_openai": - openai_options = OpenAILLMOptions.options_dict(args) - kwargs.update(openai_options) - else: - kwargs["temperature"] = args.temperature + # Use OpenAI LLM options + openai_options = OpenAILLMOptions.options_dict(args) + kwargs["timeout"] = llm_timeout + kwargs.update(openai_options) return await azure_openai_complete_if_cache( args.llm_model, @@ -451,7 +449,7 @@ def create_app(args): llm_model_kwargs=( { "host": args.llm_binding_host, - "timeout": args.timeout, + "timeout": llm_timeout, "options": OllamaLLMOptions.options_dict(args), "api_key": args.llm_binding_api_key, } @@ -482,7 +480,7 @@ def create_app(args): chunk_token_size=int(args.chunk_size), chunk_overlap_token_size=int(args.chunk_overlap_size), llm_model_kwargs={ - "timeout": args.timeout, + "timeout": llm_timeout, }, llm_model_name=args.llm_model, llm_model_max_async=args.max_async, diff --git a/lightrag/llm/anthropic.py b/lightrag/llm/anthropic.py index 7878c8f0..b7a7dfaa 100644 --- a/lightrag/llm/anthropic.py +++ b/lightrag/llm/anthropic.py @@ -77,14 +77,18 @@ async def anthropic_complete_if_cache( if not VERBOSE_DEBUG and logger.level == logging.DEBUG: logging.getLogger("anthropic").setLevel(logging.INFO) + kwargs.pop("hashing_kv", None) + kwargs.pop("keyword_extraction", None) + timeout = kwargs.pop("timeout", None) + anthropic_async_client = ( - AsyncAnthropic(default_headers=default_headers, api_key=api_key) + AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout) if base_url is None else AsyncAnthropic( - base_url=base_url, default_headers=default_headers, api_key=api_key + base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout ) ) - kwargs.pop("hashing_kv", None) + messages: list[dict[str, Any]] = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) diff --git a/lightrag/llm/azure_openai.py b/lightrag/llm/azure_openai.py index 60d2c18e..adec391a 100644 --- a/lightrag/llm/azure_openai.py +++ b/lightrag/llm/azure_openai.py @@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache( or os.getenv("OPENAI_API_VERSION") ) + kwargs.pop("hashing_kv", None) + kwargs.pop("keyword_extraction", None) + timeout = kwargs.pop("timeout", None) + openai_async_client = AsyncAzureOpenAI( azure_endpoint=base_url, azure_deployment=deployment, api_key=api_key, api_version=api_version, + timeout=timeout, ) - kwargs.pop("hashing_kv", None) messages = [] if system_prompt: messages.append({"role": "system", "content": system_prompt}) diff --git a/lightrag/llm/ollama.py b/lightrag/llm/ollama.py index 1ca5504e..6423fa90 100644 --- a/lightrag/llm/ollama.py +++ b/lightrag/llm/ollama.py @@ -51,6 +51,8 @@ async def _ollama_model_if_cache( # kwargs.pop("response_format", None) # allow json host = kwargs.pop("host", None) timeout = kwargs.pop("timeout", None) + if timeout == 0: + timeout = None kwargs.pop("hashing_kv", None) api_key = kwargs.pop("api_key", None) headers = { diff --git a/lightrag/llm/openai.py b/lightrag/llm/openai.py index 910d1812..f920e392 100644 --- a/lightrag/llm/openai.py +++ b/lightrag/llm/openai.py @@ -149,17 +149,18 @@ async def openai_complete_if_cache( if not VERBOSE_DEBUG and logger.level == logging.DEBUG: logging.getLogger("openai").setLevel(logging.INFO) + # Remove special kwargs that shouldn't be passed to OpenAI + kwargs.pop("hashing_kv", None) + kwargs.pop("keyword_extraction", None) + # Extract client configuration options client_configs = kwargs.pop("openai_client_configs", {}) # Create the OpenAI client openai_async_client = create_openai_async_client( - api_key=api_key, base_url=base_url, client_configs=client_configs + api_key=api_key, base_url=base_url, client_configs=client_configs, ) - # Remove special kwargs that shouldn't be passed to OpenAI - kwargs.pop("hashing_kv", None) - kwargs.pop("keyword_extraction", None) # Prepare messages messages: list[dict[str, Any]] = []