Add LLM_TIMEOUT configuration for all LLM providers

- Add LLM_TIMEOUT env variable
- Apply timeout to all LLM bindings
This commit is contained in:
yangdx 2025-08-20 23:50:57 +08:00
parent 4c556d8aae
commit df7bcb1e3d
6 changed files with 34 additions and 23 deletions

View file

@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
### LLM Configuration
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
###########################################################
### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
# TEMPERATURE=1.0
### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
# LLM_TIMEOUT=150
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
LLM_BINDING=openai

View file

@ -254,6 +254,8 @@ def create_app(args):
if args.embedding_binding == "jina":
from lightrag.llm.jina import jina_embed
llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
async def openai_alike_model_complete(
prompt,
system_prompt=None,
@ -267,12 +269,10 @@ def create_app(args):
if history_messages is None:
history_messages = []
# Use OpenAI LLM options if available, otherwise fallback to global temperature
if args.llm_binding == "openai":
openai_options = OpenAILLMOptions.options_dict(args)
kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
# Use OpenAI LLM options if available
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await openai_complete_if_cache(
args.llm_model,
@ -297,12 +297,10 @@ def create_app(args):
if history_messages is None:
history_messages = []
# Use OpenAI LLM options if available, otherwise fallback to global temperature
if args.llm_binding == "azure_openai":
openai_options = OpenAILLMOptions.options_dict(args)
kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
# Use OpenAI LLM options
openai_options = OpenAILLMOptions.options_dict(args)
kwargs["timeout"] = llm_timeout
kwargs.update(openai_options)
return await azure_openai_complete_if_cache(
args.llm_model,
@ -451,7 +449,7 @@ def create_app(args):
llm_model_kwargs=(
{
"host": args.llm_binding_host,
"timeout": args.timeout,
"timeout": llm_timeout,
"options": OllamaLLMOptions.options_dict(args),
"api_key": args.llm_binding_api_key,
}
@ -482,7 +480,7 @@ def create_app(args):
chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs={
"timeout": args.timeout,
"timeout": llm_timeout,
},
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,

View file

@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
logging.getLogger("anthropic").setLevel(logging.INFO)
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
anthropic_async_client = (
AsyncAnthropic(default_headers=default_headers, api_key=api_key)
AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
if base_url is None
else AsyncAnthropic(
base_url=base_url, default_headers=default_headers, api_key=api_key
base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
)
)
kwargs.pop("hashing_kv", None)
messages: list[dict[str, Any]] = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})

View file

@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
or os.getenv("OPENAI_API_VERSION")
)
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
openai_async_client = AsyncAzureOpenAI(
azure_endpoint=base_url,
azure_deployment=deployment,
api_key=api_key,
api_version=api_version,
timeout=timeout,
)
kwargs.pop("hashing_kv", None)
messages = []
if system_prompt:
messages.append({"role": "system", "content": system_prompt})

View file

@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
# kwargs.pop("response_format", None) # allow json
host = kwargs.pop("host", None)
timeout = kwargs.pop("timeout", None)
if timeout == 0:
timeout = None
kwargs.pop("hashing_kv", None)
api_key = kwargs.pop("api_key", None)
headers = {

View file

@ -149,17 +149,18 @@ async def openai_complete_if_cache(
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
logging.getLogger("openai").setLevel(logging.INFO)
# Remove special kwargs that shouldn't be passed to OpenAI
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
# Extract client configuration options
client_configs = kwargs.pop("openai_client_configs", {})
# Create the OpenAI client
openai_async_client = create_openai_async_client(
api_key=api_key, base_url=base_url, client_configs=client_configs
api_key=api_key, base_url=base_url, client_configs=client_configs,
)
# Remove special kwargs that shouldn't be passed to OpenAI
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
# Prepare messages
messages: list[dict[str, Any]] = []