Add LLM_TIMEOUT configuration for all LLM providers
- Add LLM_TIMEOUT env variable - Apply timeout to all LLM bindings
This commit is contained in:
parent
4c556d8aae
commit
df7bcb1e3d
6 changed files with 34 additions and 23 deletions
|
|
@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
|
|||
### LLM Configuration
|
||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
||||
###########################################################
|
||||
### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
|
||||
### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
|
||||
# TEMPERATURE=1.0
|
||||
### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
|
||||
# LLM_TIMEOUT=150
|
||||
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
||||
|
||||
LLM_BINDING=openai
|
||||
|
|
|
|||
|
|
@ -254,6 +254,8 @@ def create_app(args):
|
|||
if args.embedding_binding == "jina":
|
||||
from lightrag.llm.jina import jina_embed
|
||||
|
||||
llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
|
||||
|
||||
async def openai_alike_model_complete(
|
||||
prompt,
|
||||
system_prompt=None,
|
||||
|
|
@ -267,12 +269,10 @@ def create_app(args):
|
|||
if history_messages is None:
|
||||
history_messages = []
|
||||
|
||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
||||
if args.llm_binding == "openai":
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs.update(openai_options)
|
||||
else:
|
||||
kwargs["temperature"] = args.temperature
|
||||
# Use OpenAI LLM options if available
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs["timeout"] = llm_timeout
|
||||
kwargs.update(openai_options)
|
||||
|
||||
return await openai_complete_if_cache(
|
||||
args.llm_model,
|
||||
|
|
@ -297,12 +297,10 @@ def create_app(args):
|
|||
if history_messages is None:
|
||||
history_messages = []
|
||||
|
||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
||||
if args.llm_binding == "azure_openai":
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs.update(openai_options)
|
||||
else:
|
||||
kwargs["temperature"] = args.temperature
|
||||
# Use OpenAI LLM options
|
||||
openai_options = OpenAILLMOptions.options_dict(args)
|
||||
kwargs["timeout"] = llm_timeout
|
||||
kwargs.update(openai_options)
|
||||
|
||||
return await azure_openai_complete_if_cache(
|
||||
args.llm_model,
|
||||
|
|
@ -451,7 +449,7 @@ def create_app(args):
|
|||
llm_model_kwargs=(
|
||||
{
|
||||
"host": args.llm_binding_host,
|
||||
"timeout": args.timeout,
|
||||
"timeout": llm_timeout,
|
||||
"options": OllamaLLMOptions.options_dict(args),
|
||||
"api_key": args.llm_binding_api_key,
|
||||
}
|
||||
|
|
@ -482,7 +480,7 @@ def create_app(args):
|
|||
chunk_token_size=int(args.chunk_size),
|
||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||
llm_model_kwargs={
|
||||
"timeout": args.timeout,
|
||||
"timeout": llm_timeout,
|
||||
},
|
||||
llm_model_name=args.llm_model,
|
||||
llm_model_max_async=args.max_async,
|
||||
|
|
|
|||
|
|
@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
|
|||
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
||||
logging.getLogger("anthropic").setLevel(logging.INFO)
|
||||
|
||||
kwargs.pop("hashing_kv", None)
|
||||
kwargs.pop("keyword_extraction", None)
|
||||
timeout = kwargs.pop("timeout", None)
|
||||
|
||||
anthropic_async_client = (
|
||||
AsyncAnthropic(default_headers=default_headers, api_key=api_key)
|
||||
AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
|
||||
if base_url is None
|
||||
else AsyncAnthropic(
|
||||
base_url=base_url, default_headers=default_headers, api_key=api_key
|
||||
base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
|
||||
)
|
||||
)
|
||||
kwargs.pop("hashing_kv", None)
|
||||
|
||||
messages: list[dict[str, Any]] = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
|
|
|
|||
|
|
@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
|
|||
or os.getenv("OPENAI_API_VERSION")
|
||||
)
|
||||
|
||||
kwargs.pop("hashing_kv", None)
|
||||
kwargs.pop("keyword_extraction", None)
|
||||
timeout = kwargs.pop("timeout", None)
|
||||
|
||||
openai_async_client = AsyncAzureOpenAI(
|
||||
azure_endpoint=base_url,
|
||||
azure_deployment=deployment,
|
||||
api_key=api_key,
|
||||
api_version=api_version,
|
||||
timeout=timeout,
|
||||
)
|
||||
kwargs.pop("hashing_kv", None)
|
||||
messages = []
|
||||
if system_prompt:
|
||||
messages.append({"role": "system", "content": system_prompt})
|
||||
|
|
|
|||
|
|
@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
|
|||
# kwargs.pop("response_format", None) # allow json
|
||||
host = kwargs.pop("host", None)
|
||||
timeout = kwargs.pop("timeout", None)
|
||||
if timeout == 0:
|
||||
timeout = None
|
||||
kwargs.pop("hashing_kv", None)
|
||||
api_key = kwargs.pop("api_key", None)
|
||||
headers = {
|
||||
|
|
|
|||
|
|
@ -149,17 +149,18 @@ async def openai_complete_if_cache(
|
|||
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
||||
logging.getLogger("openai").setLevel(logging.INFO)
|
||||
|
||||
# Remove special kwargs that shouldn't be passed to OpenAI
|
||||
kwargs.pop("hashing_kv", None)
|
||||
kwargs.pop("keyword_extraction", None)
|
||||
|
||||
# Extract client configuration options
|
||||
client_configs = kwargs.pop("openai_client_configs", {})
|
||||
|
||||
# Create the OpenAI client
|
||||
openai_async_client = create_openai_async_client(
|
||||
api_key=api_key, base_url=base_url, client_configs=client_configs
|
||||
api_key=api_key, base_url=base_url, client_configs=client_configs,
|
||||
)
|
||||
|
||||
# Remove special kwargs that shouldn't be passed to OpenAI
|
||||
kwargs.pop("hashing_kv", None)
|
||||
kwargs.pop("keyword_extraction", None)
|
||||
|
||||
# Prepare messages
|
||||
messages: list[dict[str, Any]] = []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue