Add LLM_TIMEOUT configuration for all LLM providers
- Add LLM_TIMEOUT env variable - Apply timeout to all LLM bindings
This commit is contained in:
parent
4c556d8aae
commit
df7bcb1e3d
6 changed files with 34 additions and 23 deletions
|
|
@ -127,8 +127,10 @@ MAX_PARALLEL_INSERT=2
|
||||||
### LLM Configuration
|
### LLM Configuration
|
||||||
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
|
||||||
###########################################################
|
###########################################################
|
||||||
### LLM temperature setting for all llm binding (openai, azure_openai, ollama)
|
### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
|
||||||
# TEMPERATURE=1.0
|
# TEMPERATURE=1.0
|
||||||
|
### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
|
||||||
|
# LLM_TIMEOUT=150
|
||||||
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
||||||
|
|
||||||
LLM_BINDING=openai
|
LLM_BINDING=openai
|
||||||
|
|
|
||||||
|
|
@ -254,6 +254,8 @@ def create_app(args):
|
||||||
if args.embedding_binding == "jina":
|
if args.embedding_binding == "jina":
|
||||||
from lightrag.llm.jina import jina_embed
|
from lightrag.llm.jina import jina_embed
|
||||||
|
|
||||||
|
llm_timeout = get_env_value("LLM_TIMEOUT", args.timeout, int)
|
||||||
|
|
||||||
async def openai_alike_model_complete(
|
async def openai_alike_model_complete(
|
||||||
prompt,
|
prompt,
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
|
|
@ -267,12 +269,10 @@ def create_app(args):
|
||||||
if history_messages is None:
|
if history_messages is None:
|
||||||
history_messages = []
|
history_messages = []
|
||||||
|
|
||||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
# Use OpenAI LLM options if available
|
||||||
if args.llm_binding == "openai":
|
openai_options = OpenAILLMOptions.options_dict(args)
|
||||||
openai_options = OpenAILLMOptions.options_dict(args)
|
kwargs["timeout"] = llm_timeout
|
||||||
kwargs.update(openai_options)
|
kwargs.update(openai_options)
|
||||||
else:
|
|
||||||
kwargs["temperature"] = args.temperature
|
|
||||||
|
|
||||||
return await openai_complete_if_cache(
|
return await openai_complete_if_cache(
|
||||||
args.llm_model,
|
args.llm_model,
|
||||||
|
|
@ -297,12 +297,10 @@ def create_app(args):
|
||||||
if history_messages is None:
|
if history_messages is None:
|
||||||
history_messages = []
|
history_messages = []
|
||||||
|
|
||||||
# Use OpenAI LLM options if available, otherwise fallback to global temperature
|
# Use OpenAI LLM options
|
||||||
if args.llm_binding == "azure_openai":
|
openai_options = OpenAILLMOptions.options_dict(args)
|
||||||
openai_options = OpenAILLMOptions.options_dict(args)
|
kwargs["timeout"] = llm_timeout
|
||||||
kwargs.update(openai_options)
|
kwargs.update(openai_options)
|
||||||
else:
|
|
||||||
kwargs["temperature"] = args.temperature
|
|
||||||
|
|
||||||
return await azure_openai_complete_if_cache(
|
return await azure_openai_complete_if_cache(
|
||||||
args.llm_model,
|
args.llm_model,
|
||||||
|
|
@ -451,7 +449,7 @@ def create_app(args):
|
||||||
llm_model_kwargs=(
|
llm_model_kwargs=(
|
||||||
{
|
{
|
||||||
"host": args.llm_binding_host,
|
"host": args.llm_binding_host,
|
||||||
"timeout": args.timeout,
|
"timeout": llm_timeout,
|
||||||
"options": OllamaLLMOptions.options_dict(args),
|
"options": OllamaLLMOptions.options_dict(args),
|
||||||
"api_key": args.llm_binding_api_key,
|
"api_key": args.llm_binding_api_key,
|
||||||
}
|
}
|
||||||
|
|
@ -482,7 +480,7 @@ def create_app(args):
|
||||||
chunk_token_size=int(args.chunk_size),
|
chunk_token_size=int(args.chunk_size),
|
||||||
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
chunk_overlap_token_size=int(args.chunk_overlap_size),
|
||||||
llm_model_kwargs={
|
llm_model_kwargs={
|
||||||
"timeout": args.timeout,
|
"timeout": llm_timeout,
|
||||||
},
|
},
|
||||||
llm_model_name=args.llm_model,
|
llm_model_name=args.llm_model,
|
||||||
llm_model_max_async=args.max_async,
|
llm_model_max_async=args.max_async,
|
||||||
|
|
|
||||||
|
|
@ -77,14 +77,18 @@ async def anthropic_complete_if_cache(
|
||||||
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
||||||
logging.getLogger("anthropic").setLevel(logging.INFO)
|
logging.getLogger("anthropic").setLevel(logging.INFO)
|
||||||
|
|
||||||
|
kwargs.pop("hashing_kv", None)
|
||||||
|
kwargs.pop("keyword_extraction", None)
|
||||||
|
timeout = kwargs.pop("timeout", None)
|
||||||
|
|
||||||
anthropic_async_client = (
|
anthropic_async_client = (
|
||||||
AsyncAnthropic(default_headers=default_headers, api_key=api_key)
|
AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
|
||||||
if base_url is None
|
if base_url is None
|
||||||
else AsyncAnthropic(
|
else AsyncAnthropic(
|
||||||
base_url=base_url, default_headers=default_headers, api_key=api_key
|
base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
kwargs.pop("hashing_kv", None)
|
|
||||||
messages: list[dict[str, Any]] = []
|
messages: list[dict[str, Any]] = []
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages.append({"role": "system", "content": system_prompt})
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
|
|
|
||||||
|
|
@ -59,13 +59,17 @@ async def azure_openai_complete_if_cache(
|
||||||
or os.getenv("OPENAI_API_VERSION")
|
or os.getenv("OPENAI_API_VERSION")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
kwargs.pop("hashing_kv", None)
|
||||||
|
kwargs.pop("keyword_extraction", None)
|
||||||
|
timeout = kwargs.pop("timeout", None)
|
||||||
|
|
||||||
openai_async_client = AsyncAzureOpenAI(
|
openai_async_client = AsyncAzureOpenAI(
|
||||||
azure_endpoint=base_url,
|
azure_endpoint=base_url,
|
||||||
azure_deployment=deployment,
|
azure_deployment=deployment,
|
||||||
api_key=api_key,
|
api_key=api_key,
|
||||||
api_version=api_version,
|
api_version=api_version,
|
||||||
|
timeout=timeout,
|
||||||
)
|
)
|
||||||
kwargs.pop("hashing_kv", None)
|
|
||||||
messages = []
|
messages = []
|
||||||
if system_prompt:
|
if system_prompt:
|
||||||
messages.append({"role": "system", "content": system_prompt})
|
messages.append({"role": "system", "content": system_prompt})
|
||||||
|
|
|
||||||
|
|
@ -51,6 +51,8 @@ async def _ollama_model_if_cache(
|
||||||
# kwargs.pop("response_format", None) # allow json
|
# kwargs.pop("response_format", None) # allow json
|
||||||
host = kwargs.pop("host", None)
|
host = kwargs.pop("host", None)
|
||||||
timeout = kwargs.pop("timeout", None)
|
timeout = kwargs.pop("timeout", None)
|
||||||
|
if timeout == 0:
|
||||||
|
timeout = None
|
||||||
kwargs.pop("hashing_kv", None)
|
kwargs.pop("hashing_kv", None)
|
||||||
api_key = kwargs.pop("api_key", None)
|
api_key = kwargs.pop("api_key", None)
|
||||||
headers = {
|
headers = {
|
||||||
|
|
|
||||||
|
|
@ -149,17 +149,18 @@ async def openai_complete_if_cache(
|
||||||
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
if not VERBOSE_DEBUG and logger.level == logging.DEBUG:
|
||||||
logging.getLogger("openai").setLevel(logging.INFO)
|
logging.getLogger("openai").setLevel(logging.INFO)
|
||||||
|
|
||||||
|
# Remove special kwargs that shouldn't be passed to OpenAI
|
||||||
|
kwargs.pop("hashing_kv", None)
|
||||||
|
kwargs.pop("keyword_extraction", None)
|
||||||
|
|
||||||
# Extract client configuration options
|
# Extract client configuration options
|
||||||
client_configs = kwargs.pop("openai_client_configs", {})
|
client_configs = kwargs.pop("openai_client_configs", {})
|
||||||
|
|
||||||
# Create the OpenAI client
|
# Create the OpenAI client
|
||||||
openai_async_client = create_openai_async_client(
|
openai_async_client = create_openai_async_client(
|
||||||
api_key=api_key, base_url=base_url, client_configs=client_configs
|
api_key=api_key, base_url=base_url, client_configs=client_configs,
|
||||||
)
|
)
|
||||||
|
|
||||||
# Remove special kwargs that shouldn't be passed to OpenAI
|
|
||||||
kwargs.pop("hashing_kv", None)
|
|
||||||
kwargs.pop("keyword_extraction", None)
|
|
||||||
|
|
||||||
# Prepare messages
|
# Prepare messages
|
||||||
messages: list[dict[str, Any]] = []
|
messages: list[dict[str, Any]] = []
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue