Add LLM_TIMEOUT configuration for all LLM providers

- Add LLM_TIMEOUT env variable
- Apply timeout to all LLM bindings
This commit is contained in:
yangdx 2025-08-20 23:50:57 +08:00
parent 4c556d8aae
commit df7bcb1e3d
6 changed files with 34 additions and 23 deletions

View file

@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
### LLM Configuration ### LLM Configuration
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock ### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
########################################################### ###########################################################
### LLM temperature setting for all llm binding (openai, azure_openai, ollama) ### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
# TEMPERATURE=1.0 # TEMPERATURE=1.0
### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
# LLM_TIMEOUT=150
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature ### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
LLM_BINDING=openai LLM_BINDING=openai

View file

@ -254,6 +254,8 @@ def create_app(args):
if args.embedding_binding == "jina": if args.embedding_binding == "jina":
from lightrag.llm.jina import jina_embed from lightrag.llm.jina import jina_embed
llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
async def openai_alike_model_complete( async def openai_alike_model_complete(
prompt, prompt,
system_prompt=None, system_prompt=None,
@ -267,12 +269,10 @@ def create_app(args):
if history_messages is None: if history_messages is None:
history_messages = [] history_messages = []
# Use OpenAI LLM options if available, otherwise fallback to global temperature # Use OpenAI LLM options if available
if args.llm_binding == "openai": openai_options = OpenAILLMOptions.options_dict(args)
openai_options = OpenAILLMOptions.options_dict(args) kwargs["timeout"] = llm_timeout
kwargs.update(openai_options) kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
return await openai_complete_if_cache( return await openai_complete_if_cache(
args.llm_model, args.llm_model,
@ -297,12 +297,10 @@ def create_app(args):
if history_messages is None: if history_messages is None:
history_messages = [] history_messages = []
# Use OpenAI LLM options if available, otherwise fallback to global temperature # Use OpenAI LLM options
if args.llm_binding == "azure_openai": openai_options = OpenAILLMOptions.options_dict(args)
openai_options = OpenAILLMOptions.options_dict(args) kwargs["timeout"] = llm_timeout
kwargs.update(openai_options) kwargs.update(openai_options)
else:
kwargs["temperature"] = args.temperature
return await azure_openai_complete_if_cache( return await azure_openai_complete_if_cache(
args.llm_model, args.llm_model,
@ -451,7 +449,7 @@ def create_app(args):
llm_model_kwargs=( llm_model_kwargs=(
{ {
"host": args.llm_binding_host, "host": args.llm_binding_host,
"timeout": args.timeout, "timeout": llm_timeout,
"options": OllamaLLMOptions.options_dict(args), "options": OllamaLLMOptions.options_dict(args),
"api_key": args.llm_binding_api_key, "api_key": args.llm_binding_api_key,
} }
@ -482,7 +480,7 @@ def create_app(args):
chunk_token_size=int(args.chunk_size), chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size), chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs={ llm_model_kwargs={
"timeout": args.timeout, "timeout": llm_timeout,
}, },
llm_model_name=args.llm_model, llm_model_name=args.llm_model,
llm_model_max_async=args.max_async, llm_model_max_async=args.max_async,

View file

@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
if not VERBOSE_DEBUG and logger.level == logging.DEBUG: if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
logging.getLogger("anthropic").setLevel(logging.INFO) logging.getLogger("anthropic").setLevel(logging.INFO)
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
anthropic_async_client = ( anthropic_async_client = (
AsyncAnthropic(default_headers=default_headers, api_key=api_key) AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
if base_url is None if base_url is None
else AsyncAnthropic( else AsyncAnthropic(
base_url=base_url, default_headers=default_headers, api_key=api_key base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
) )
) )
kwargs.pop("hashing_kv", None)
messages: list[dict[str, Any]] = [] messages: list[dict[str, Any]] = []
if system_prompt: if system_prompt:
messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "system", "content": system_prompt})

View file

@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
or os.getenv("OPENAI_API_VERSION") or os.getenv("OPENAI_API_VERSION")
) )
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
openai_async_client = AsyncAzureOpenAI( openai_async_client = AsyncAzureOpenAI(
azure_endpoint=base_url, azure_endpoint=base_url,
azure_deployment=deployment, azure_deployment=deployment,
api_key=api_key, api_key=api_key,
api_version=api_version, api_version=api_version,
timeout=timeout,
) )
kwargs.pop("hashing_kv", None)
messages = [] messages = []
if system_prompt: if system_prompt:
messages.append({"role": "system", "content": system_prompt}) messages.append({"role": "system", "content": system_prompt})

View file

@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
# kwargs.pop("response_format", None) # allow json # kwargs.pop("response_format", None) # allow json
host = kwargs.pop("host", None) host = kwargs.pop("host", None)
timeout = kwargs.pop("timeout", None) timeout = kwargs.pop("timeout", None)
if timeout == 0:
timeout = None
kwargs.pop("hashing_kv", None) kwargs.pop("hashing_kv", None)
api_key = kwargs.pop("api_key", None) api_key = kwargs.pop("api_key", None)
headers = { headers = {

View file

@ -149,17 +149,18 @@ async def openai_complete_if_cache(
if not VERBOSE_DEBUG and logger.level == logging.DEBUG: if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
logging.getLogger("openai").setLevel(logging.INFO) logging.getLogger("openai").setLevel(logging.INFO)
# Remove special kwargs that shouldn't be passed to OpenAI
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
# Extract client configuration options # Extract client configuration options
client_configs = kwargs.pop("openai_client_configs", {}) client_configs = kwargs.pop("openai_client_configs", {})
# Create the OpenAI client # Create the OpenAI client
openai_async_client = create_openai_async_client( openai_async_client = create_openai_async_client(
api_key=api_key, base_url=base_url, client_configs=client_configs api_key=api_key, base_url=base_url, client_configs=client_configs,
) )
# Remove special kwargs that shouldn't be passed to OpenAI
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
# Prepare messages # Prepare messages
messages: list[dict[str, Any]] = [] messages: list[dict[str, Any]] = []