Refactor LLM temperature handling to be provider-specific

• Remove global temperature parameter
• Add provider-specific temp configs
• Update env example with new settings
• Fix Bedrock temperature handling
• Clean up splash screen display
This commit is contained in:
yangdx 2025-08-20 23:52:33 +08:00
parent df7bcb1e3d
commit aa22772721
9 changed files with 20 additions and 54 deletions

View file

@ -127,9 +127,7 @@ MAX_PARALLEL_INSERT=2
### LLM Configuration
### LLM_BINDING type: openai, ollama, lollms, azure_openai, aws_bedrock
###########################################################
### LLM temperature and timeout setting for all llm binding (openai, azure_openai, ollama)
# TEMPERATURE=1.0
### LLM request timeout setting for all llm (set to TIMEOUT if not specified)
### LLM request timeout setting for all llm (set to TIMEOUT if not specified, 0 means no timeout for Ollma)
# LLM_TIMEOUT=150
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
@ -151,6 +149,7 @@ LLM_BINDING_API_KEY=your_api_key
### OpenAI Specific Parameters
### Apply frequency penalty to prevent the LLM from generating repetitive or looping outputs
# OPENAI_LLM_FREQUENCY_PENALTY=1.1
# OPENAI_LLM_TEMPERATURE=1.0
### use the following command to see all support options for openai and azure_openai
### lightrag-server --llm-binding openai --help
@ -164,6 +163,9 @@ OLLAMA_LLM_NUM_CTX=32768
### use the following command to see all support options for Ollama LLM
### lightrag-server --llm-binding ollama --help
### Bedrock Specific Parameters
# BEDROCK_LLM_TEMPERATURE=1.0
####################################################################################
### Embedding Configuration (Should not be changed after the first file processed)
####################################################################################

View file

@ -35,7 +35,6 @@ from lightrag.constants import (
DEFAULT_EMBEDDING_BATCH_NUM,
DEFAULT_OLLAMA_MODEL_NAME,
DEFAULT_OLLAMA_MODEL_TAG,
DEFAULT_TEMPERATURE,
)
# use the .env that is inside the current folder
@ -264,14 +263,6 @@ def parse_args() -> argparse.Namespace:
elif os.environ.get("LLM_BINDING") in ["openai", "azure_openai"]:
OpenAILLMOptions.add_args(parser)
# Add global temperature command line argument
parser.add_argument(
"--temperature",
type=float,
default=get_env_value("TEMPERATURE", DEFAULT_TEMPERATURE, float),
help="Global temperature setting for LLM (default: from env TEMPERATURE or 0.1)",
)
args = parser.parse_args()
# convert relative path to absolute path
@ -330,32 +321,6 @@ def parse_args() -> argparse.Namespace:
)
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
# Handle Ollama LLM temperature with priority cascade when llm-binding is ollama
if args.llm_binding == "ollama":
# Priority order (highest to lowest):
# 1. --ollama-llm-temperature command argument
# 2. OLLAMA_LLM_TEMPERATURE environment variable
# 3. --temperature command argument
# 4. TEMPERATURE environment variable
# Check if --ollama-llm-temperature was explicitly provided in command line
if "--ollama-llm-temperature" not in sys.argv:
# Use args.temperature which handles --temperature command arg and TEMPERATURE env var priority
args.ollama_llm_temperature = args.temperature
# Handle OpenAI LLM temperature with priority cascade when llm-binding is openai or azure_openai
if args.llm_binding in ["openai", "azure_openai"]:
# Priority order (highest to lowest):
# 1. --openai-llm-temperature command argument
# 2. OPENAI_LLM_TEMPERATURE environment variable
# 3. --temperature command argument
# 4. TEMPERATURE environment variable
# Check if --openai-llm-temperature was explicitly provided in command line
if "--openai-llm-temperature" not in sys.argv:
# Use args.temperature which handles --temperature command arg and TEMPERATURE env var priority
args.openai_llm_temperature = args.temperature
# Select Document loading tool (DOCLING, DEFAULT)
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")

View file

@ -327,7 +327,7 @@ def create_app(args):
history_messages = []
# Use global temperature for Bedrock
kwargs["temperature"] = args.temperature
kwargs["temperature"] = get_env_value("BEDROCK_LLM_TEMPERATURE", 1.0, float)
return await bedrock_complete_if_cache(
args.llm_model,
@ -479,9 +479,6 @@ def create_app(args):
llm_model_func=azure_openai_model_complete,
chunk_token_size=int(args.chunk_size),
chunk_overlap_token_size=int(args.chunk_overlap_size),
llm_model_kwargs={
"timeout": llm_timeout,
},
llm_model_name=args.llm_model,
llm_model_max_async=args.max_async,
summary_max_tokens=args.max_tokens,

View file

@ -201,6 +201,8 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.port}")
ASCIIColors.white(" ├─ Workers: ", end="")
ASCIIColors.yellow(f"{args.workers}")
ASCIIColors.white(" ├─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout}")
ASCIIColors.white(" ├─ CORS Origins: ", end="")
ASCIIColors.yellow(f"{args.cors_origins}")
ASCIIColors.white(" ├─ SSL Enabled: ", end="")
@ -238,14 +240,10 @@ def display_splash_screen(args: argparse.Namespace) -> None:
ASCIIColors.yellow(f"{args.llm_binding_host}")
ASCIIColors.white(" ├─ Model: ", end="")
ASCIIColors.yellow(f"{args.llm_model}")
ASCIIColors.white(" ├─ Temperature: ", end="")
ASCIIColors.yellow(f"{args.temperature}")
ASCIIColors.white(" ├─ Max Async for LLM: ", end="")
ASCIIColors.yellow(f"{args.max_async}")
ASCIIColors.white(" ├─ Max Tokens: ", end="")
ASCIIColors.yellow(f"{args.max_tokens}")
ASCIIColors.white(" ├─ Timeout: ", end="")
ASCIIColors.yellow(f"{args.timeout if args.timeout else 'None (infinite)'}")
ASCIIColors.white(" ├─ LLM Cache Enabled: ", end="")
ASCIIColors.yellow(f"{args.enable_llm_cache}")
ASCIIColors.white(" └─ LLM Cache for Extraction Enabled: ", end="")

View file

@ -36,7 +36,6 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
llm_instance = OpenAI(
model="gpt-4",
api_key="your-openai-key",
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance
@ -91,7 +90,6 @@ async def llm_model_func(prompt, system_prompt=None, history_messages=[], **kwar
model=f"openai/{settings.LLM_MODEL}", # Format: "provider/model_name"
api_base=settings.LITELLM_URL,
api_key=settings.LITELLM_KEY,
temperature=0.7,
)
kwargs['llm_instance'] = llm_instance

View file

@ -82,10 +82,15 @@ async def anthropic_complete_if_cache(
timeout = kwargs.pop("timeout", None)
anthropic_async_client = (
AsyncAnthropic(default_headers=default_headers, api_key=api_key, timeout=timeout)
AsyncAnthropic(
default_headers=default_headers, api_key=api_key, timeout=timeout
)
if base_url is None
else AsyncAnthropic(
base_url=base_url, default_headers=default_headers, api_key=api_key, timeout=timeout
base_url=base_url,
default_headers=default_headers,
api_key=api_key,
timeout=timeout,
)
)

View file

@ -62,7 +62,7 @@ async def azure_openai_complete_if_cache(
kwargs.pop("hashing_kv", None)
kwargs.pop("keyword_extraction", None)
timeout = kwargs.pop("timeout", None)
openai_async_client = AsyncAzureOpenAI(
azure_endpoint=base_url,
azure_deployment=deployment,

View file

@ -59,7 +59,7 @@ async def lollms_model_if_cache(
"personality": kwargs.get("personality", -1),
"n_predict": kwargs.get("n_predict", None),
"stream": stream,
"temperature": kwargs.get("temperature", 0.8),
"temperature": kwargs.get("temperature", 1.0),
"top_k": kwargs.get("top_k", 50),
"top_p": kwargs.get("top_p", 0.95),
"repeat_penalty": kwargs.get("repeat_penalty", 0.8),

View file

@ -158,10 +158,11 @@ async def openai_complete_if_cache(
# Create the OpenAI client
openai_async_client = create_openai_async_client(
api_key=api_key, base_url=base_url, client_configs=client_configs,
api_key=api_key,
base_url=base_url,
client_configs=client_configs,
)
# Prepare messages
messages: list[dict[str, Any]] = []
if system_prompt: