From 9d5603d35ea44cfdcc2d530cf5fd2bad8ffa2029 Mon Sep 17 00:00:00 2001 From: yangdx Date: Thu, 31 Jul 2025 17:15:10 +0800 Subject: [PATCH] Set the default LLM temperature to 1.0 and centralize constant management --- env.example | 12 +++++++----- lightrag/api/README-zh.md | 1 - lightrag/api/README.md | 1 - lightrag/api/config.py | 3 ++- lightrag/constants.py | 11 +++++++++-- lightrag/lightrag.py | 15 ++++++++++++--- lightrag/llm/lollms.py | 2 +- 7 files changed, 31 insertions(+), 14 deletions(-) diff --git a/env.example b/env.example index 699fee01..04819679 100644 --- a/env.example +++ b/env.example @@ -111,19 +111,21 @@ MAX_PARALLEL_INSERT=2 ####################### ### LLM Configuration ####################### -### Time out in seconds for LLM, None for infinite timeout -TIMEOUT=240 -### Some models like o1-mini require temperature to be set to 1 -TEMPERATURE=0 +### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature +# TEMPERATURE=1.0 + ### LLM Binding type: openai, ollama, lollms, azure_openai LLM_BINDING=openai LLM_MODEL=gpt-4o LLM_BINDING_HOST=https://api.openai.com/v1 LLM_BINDING_API_KEY=your_api_key -### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000) +### Most Commont Parameters for Ollama Server ### see also env.ollama-binding-options.example for fine tuning ollama +### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000 # OLLAMA_LLM_NUM_CTX=32768 +### Time out in seconds, None for infinite timeout +TIMEOUT=240 ### Optional for Azure # AZURE_OPENAI_API_VERSION=2024-08-01-preview diff --git a/lightrag/api/README-zh.md b/lightrag/api/README-zh.md index 12426dae..428e65d5 100644 --- a/lightrag/api/README-zh.md +++ b/lightrag/api/README-zh.md @@ -468,7 +468,6 @@ MAX_PARALLEL_INSERT=2 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) TIMEOUT=200 -TEMPERATURE=0.0 MAX_ASYNC=4 LLM_BINDING=openai diff --git a/lightrag/api/README.md b/lightrag/api/README.md index e2f3ee7d..71076059 100644 --- a/lightrag/api/README.md +++ b/lightrag/api/README.md @@ -476,7 +476,6 @@ MAX_PARALLEL_INSERT=2 ### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal) TIMEOUT=200 -TEMPERATURE=0.0 MAX_ASYNC=4 LLM_BINDING=openai diff --git a/lightrag/api/config.py b/lightrag/api/config.py index 9a213281..ce8bf623 100644 --- a/lightrag/api/config.py +++ b/lightrag/api/config.py @@ -33,6 +33,7 @@ from lightrag.constants import ( DEFAULT_OLLAMA_MODEL_SIZE, DEFAULT_OLLAMA_CREATED_AT, DEFAULT_OLLAMA_DIGEST, + DEFAULT_TEMPERATURE, ) # use the .env that is inside the current folder @@ -332,7 +333,7 @@ def parse_args() -> argparse.Namespace: args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool) # Inject LLM temperature configuration - args.temperature = get_env_value("TEMPERATURE", 0.5, float) + args.temperature = get_env_value("TEMPERATURE", DEFAULT_TEMPERATURE, float) # Select Document loading tool (DOCLING, DEFAULT) args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT") diff --git a/lightrag/constants.py b/lightrag/constants.py index 9f066ec5..e66fe0ae 100644 --- a/lightrag/constants.py +++ b/lightrag/constants.py @@ -8,7 +8,7 @@ consistency and makes maintenance easier. # Default values for server settings DEFAULT_WOKERS = 2 -DEFAULT_TIMEOUT = 150 +DEFAULT_MAX_GRAPH_NODES = 1000 # Default values for extraction settings DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries @@ -34,16 +34,23 @@ DEFAULT_HISTORY_TURNS = 0 DEFAULT_ENABLE_RERANK = True DEFAULT_MIN_RERANK_SCORE = 0.0 -# File path configuration for vector and graph database +# File path configuration for vector and graph database(Should not be changed, used in Milvus Schema) DEFAULT_MAX_FILE_PATH_LENGTH = 4090 +# Default temperature for LLM +DEFAULT_TEMPERATURE = 1.0 + # Async configuration defaults DEFAULT_MAX_ASYNC = 4 # Default maximum async operations +DEFAULT_MAX_PARALLEL_INSERT = 2 # Default maximum parallel insert operations # Embedding configuration defaults DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations +# Ollama Server Timetout in seconds +DEFAULT_TIMEOUT = 150 + # Logging configuration defaults DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups diff --git a/lightrag/lightrag.py b/lightrag/lightrag.py index 8974ee8b..b6a20a2f 100644 --- a/lightrag/lightrag.py +++ b/lightrag/lightrag.py @@ -33,6 +33,9 @@ from lightrag.constants import ( DEFAULT_RELATED_CHUNK_NUMBER, DEFAULT_MIN_RERANK_SCORE, DEFAULT_SUMMARY_MAX_TOKENS, + DEFAULT_MAX_ASYNC, + DEFAULT_MAX_PARALLEL_INSERT, + DEFAULT_MAX_GRAPH_NODES, ) from lightrag.utils import get_env_value @@ -283,7 +286,9 @@ class LightRAG: ) """Maximum number of tokens allowed per LLM response.""" - llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4))) + llm_model_max_async: int = field( + default=int(os.getenv("MAX_ASYNC", DEFAULT_MAX_ASYNC)) + ) """Maximum number of concurrent LLM calls.""" llm_model_kwargs: dict[str, Any] = field(default_factory=dict) @@ -315,10 +320,14 @@ class LightRAG: # Extensions # --- - max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2))) + max_parallel_insert: int = field( + default=int(os.getenv("MAX_PARALLEL_INSERT", DEFAULT_MAX_PARALLEL_INSERT)) + ) """Maximum number of parallel insert operations.""" - max_graph_nodes: int = field(default=get_env_value("MAX_GRAPH_NODES", 1000, int)) + max_graph_nodes: int = field( + default=get_env_value("MAX_GRAPH_NODES", DEFAULT_MAX_GRAPH_NODES, int) + ) """Maximum number of graph nodes to return in knowledge graph queries.""" addon_params: dict[str, Any] = field( diff --git a/lightrag/llm/lollms.py b/lightrag/llm/lollms.py index 2e51d7d3..357b65bf 100644 --- a/lightrag/llm/lollms.py +++ b/lightrag/llm/lollms.py @@ -59,7 +59,7 @@ async def lollms_model_if_cache( "personality": kwargs.get("personality", -1), "n_predict": kwargs.get("n_predict", None), "stream": stream, - "temperature": kwargs.get("temperature", 0.1), + "temperature": kwargs.get("temperature", 0.8), "top_k": kwargs.get("top_k", 50), "top_p": kwargs.get("top_p", 0.95), "repeat_penalty": kwargs.get("repeat_penalty", 0.8),