Set the default LLM temperature to 1.0 and centralize constant management
This commit is contained in:
parent
3c530b21b6
commit
9d5603d35e
7 changed files with 31 additions and 14 deletions
12
env.example
12
env.example
|
|
@ -111,19 +111,21 @@ MAX_PARALLEL_INSERT=2
|
||||||
#######################
|
#######################
|
||||||
### LLM Configuration
|
### LLM Configuration
|
||||||
#######################
|
#######################
|
||||||
### Time out in seconds for LLM, None for infinite timeout
|
### Some models like o1-mini require temperature to be set to 1, some LLM can fall into output loops with low temperature
|
||||||
TIMEOUT=240
|
# TEMPERATURE=1.0
|
||||||
### Some models like o1-mini require temperature to be set to 1
|
|
||||||
TEMPERATURE=0
|
|
||||||
### LLM Binding type: openai, ollama, lollms, azure_openai
|
### LLM Binding type: openai, ollama, lollms, azure_openai
|
||||||
LLM_BINDING=openai
|
LLM_BINDING=openai
|
||||||
LLM_MODEL=gpt-4o
|
LLM_MODEL=gpt-4o
|
||||||
LLM_BINDING_HOST=https://api.openai.com/v1
|
LLM_BINDING_HOST=https://api.openai.com/v1
|
||||||
LLM_BINDING_API_KEY=your_api_key
|
LLM_BINDING_API_KEY=your_api_key
|
||||||
|
|
||||||
### Set as num_ctx option for Ollama LLM (Must be larger than MAX_TOTAL_TOKENS+2000)
|
### Most Commont Parameters for Ollama Server
|
||||||
### see also env.ollama-binding-options.example for fine tuning ollama
|
### see also env.ollama-binding-options.example for fine tuning ollama
|
||||||
|
### OLLAMA_LLM_NUM_CTX must be larger than MAX_TOTAL_TOKENS + 2000
|
||||||
# OLLAMA_LLM_NUM_CTX=32768
|
# OLLAMA_LLM_NUM_CTX=32768
|
||||||
|
### Time out in seconds, None for infinite timeout
|
||||||
|
TIMEOUT=240
|
||||||
|
|
||||||
### Optional for Azure
|
### Optional for Azure
|
||||||
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
# AZURE_OPENAI_API_VERSION=2024-08-01-preview
|
||||||
|
|
|
||||||
|
|
@ -468,7 +468,6 @@ MAX_PARALLEL_INSERT=2
|
||||||
|
|
||||||
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
||||||
TIMEOUT=200
|
TIMEOUT=200
|
||||||
TEMPERATURE=0.0
|
|
||||||
MAX_ASYNC=4
|
MAX_ASYNC=4
|
||||||
|
|
||||||
LLM_BINDING=openai
|
LLM_BINDING=openai
|
||||||
|
|
|
||||||
|
|
@ -476,7 +476,6 @@ MAX_PARALLEL_INSERT=2
|
||||||
|
|
||||||
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
### LLM Configuration (Use valid host. For local services installed with docker, you can use host.docker.internal)
|
||||||
TIMEOUT=200
|
TIMEOUT=200
|
||||||
TEMPERATURE=0.0
|
|
||||||
MAX_ASYNC=4
|
MAX_ASYNC=4
|
||||||
|
|
||||||
LLM_BINDING=openai
|
LLM_BINDING=openai
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,7 @@ from lightrag.constants import (
|
||||||
DEFAULT_OLLAMA_MODEL_SIZE,
|
DEFAULT_OLLAMA_MODEL_SIZE,
|
||||||
DEFAULT_OLLAMA_CREATED_AT,
|
DEFAULT_OLLAMA_CREATED_AT,
|
||||||
DEFAULT_OLLAMA_DIGEST,
|
DEFAULT_OLLAMA_DIGEST,
|
||||||
|
DEFAULT_TEMPERATURE,
|
||||||
)
|
)
|
||||||
|
|
||||||
# use the .env that is inside the current folder
|
# use the .env that is inside the current folder
|
||||||
|
|
@ -332,7 +333,7 @@ def parse_args() -> argparse.Namespace:
|
||||||
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
args.enable_llm_cache = get_env_value("ENABLE_LLM_CACHE", True, bool)
|
||||||
|
|
||||||
# Inject LLM temperature configuration
|
# Inject LLM temperature configuration
|
||||||
args.temperature = get_env_value("TEMPERATURE", 0.5, float)
|
args.temperature = get_env_value("TEMPERATURE", DEFAULT_TEMPERATURE, float)
|
||||||
|
|
||||||
# Select Document loading tool (DOCLING, DEFAULT)
|
# Select Document loading tool (DOCLING, DEFAULT)
|
||||||
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
args.document_loading_engine = get_env_value("DOCUMENT_LOADING_ENGINE", "DEFAULT")
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@ consistency and makes maintenance easier.
|
||||||
|
|
||||||
# Default values for server settings
|
# Default values for server settings
|
||||||
DEFAULT_WOKERS = 2
|
DEFAULT_WOKERS = 2
|
||||||
DEFAULT_TIMEOUT = 150
|
DEFAULT_MAX_GRAPH_NODES = 1000
|
||||||
|
|
||||||
# Default values for extraction settings
|
# Default values for extraction settings
|
||||||
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
|
DEFAULT_SUMMARY_LANGUAGE = "English" # Default language for summaries
|
||||||
|
|
@ -34,16 +34,23 @@ DEFAULT_HISTORY_TURNS = 0
|
||||||
DEFAULT_ENABLE_RERANK = True
|
DEFAULT_ENABLE_RERANK = True
|
||||||
DEFAULT_MIN_RERANK_SCORE = 0.0
|
DEFAULT_MIN_RERANK_SCORE = 0.0
|
||||||
|
|
||||||
# File path configuration for vector and graph database
|
# File path configuration for vector and graph database(Should not be changed, used in Milvus Schema)
|
||||||
DEFAULT_MAX_FILE_PATH_LENGTH = 4090
|
DEFAULT_MAX_FILE_PATH_LENGTH = 4090
|
||||||
|
|
||||||
|
# Default temperature for LLM
|
||||||
|
DEFAULT_TEMPERATURE = 1.0
|
||||||
|
|
||||||
# Async configuration defaults
|
# Async configuration defaults
|
||||||
DEFAULT_MAX_ASYNC = 4 # Default maximum async operations
|
DEFAULT_MAX_ASYNC = 4 # Default maximum async operations
|
||||||
|
DEFAULT_MAX_PARALLEL_INSERT = 2 # Default maximum parallel insert operations
|
||||||
|
|
||||||
# Embedding configuration defaults
|
# Embedding configuration defaults
|
||||||
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions
|
DEFAULT_EMBEDDING_FUNC_MAX_ASYNC = 8 # Default max async for embedding functions
|
||||||
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
DEFAULT_EMBEDDING_BATCH_NUM = 10 # Default batch size for embedding computations
|
||||||
|
|
||||||
|
# Ollama Server Timetout in seconds
|
||||||
|
DEFAULT_TIMEOUT = 150
|
||||||
|
|
||||||
# Logging configuration defaults
|
# Logging configuration defaults
|
||||||
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
DEFAULT_LOG_MAX_BYTES = 10485760 # Default 10MB
|
||||||
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
DEFAULT_LOG_BACKUP_COUNT = 5 # Default 5 backups
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,9 @@ from lightrag.constants import (
|
||||||
DEFAULT_RELATED_CHUNK_NUMBER,
|
DEFAULT_RELATED_CHUNK_NUMBER,
|
||||||
DEFAULT_MIN_RERANK_SCORE,
|
DEFAULT_MIN_RERANK_SCORE,
|
||||||
DEFAULT_SUMMARY_MAX_TOKENS,
|
DEFAULT_SUMMARY_MAX_TOKENS,
|
||||||
|
DEFAULT_MAX_ASYNC,
|
||||||
|
DEFAULT_MAX_PARALLEL_INSERT,
|
||||||
|
DEFAULT_MAX_GRAPH_NODES,
|
||||||
)
|
)
|
||||||
from lightrag.utils import get_env_value
|
from lightrag.utils import get_env_value
|
||||||
|
|
||||||
|
|
@ -283,7 +286,9 @@ class LightRAG:
|
||||||
)
|
)
|
||||||
"""Maximum number of tokens allowed per LLM response."""
|
"""Maximum number of tokens allowed per LLM response."""
|
||||||
|
|
||||||
llm_model_max_async: int = field(default=int(os.getenv("MAX_ASYNC", 4)))
|
llm_model_max_async: int = field(
|
||||||
|
default=int(os.getenv("MAX_ASYNC", DEFAULT_MAX_ASYNC))
|
||||||
|
)
|
||||||
"""Maximum number of concurrent LLM calls."""
|
"""Maximum number of concurrent LLM calls."""
|
||||||
|
|
||||||
llm_model_kwargs: dict[str, Any] = field(default_factory=dict)
|
llm_model_kwargs: dict[str, Any] = field(default_factory=dict)
|
||||||
|
|
@ -315,10 +320,14 @@ class LightRAG:
|
||||||
# Extensions
|
# Extensions
|
||||||
# ---
|
# ---
|
||||||
|
|
||||||
max_parallel_insert: int = field(default=int(os.getenv("MAX_PARALLEL_INSERT", 2)))
|
max_parallel_insert: int = field(
|
||||||
|
default=int(os.getenv("MAX_PARALLEL_INSERT", DEFAULT_MAX_PARALLEL_INSERT))
|
||||||
|
)
|
||||||
"""Maximum number of parallel insert operations."""
|
"""Maximum number of parallel insert operations."""
|
||||||
|
|
||||||
max_graph_nodes: int = field(default=get_env_value("MAX_GRAPH_NODES", 1000, int))
|
max_graph_nodes: int = field(
|
||||||
|
default=get_env_value("MAX_GRAPH_NODES", DEFAULT_MAX_GRAPH_NODES, int)
|
||||||
|
)
|
||||||
"""Maximum number of graph nodes to return in knowledge graph queries."""
|
"""Maximum number of graph nodes to return in knowledge graph queries."""
|
||||||
|
|
||||||
addon_params: dict[str, Any] = field(
|
addon_params: dict[str, Any] = field(
|
||||||
|
|
|
||||||
|
|
@ -59,7 +59,7 @@ async def lollms_model_if_cache(
|
||||||
"personality": kwargs.get("personality", -1),
|
"personality": kwargs.get("personality", -1),
|
||||||
"n_predict": kwargs.get("n_predict", None),
|
"n_predict": kwargs.get("n_predict", None),
|
||||||
"stream": stream,
|
"stream": stream,
|
||||||
"temperature": kwargs.get("temperature", 0.1),
|
"temperature": kwargs.get("temperature", 0.8),
|
||||||
"top_k": kwargs.get("top_k", 50),
|
"top_k": kwargs.get("top_k", 50),
|
||||||
"top_p": kwargs.get("top_p", 0.95),
|
"top_p": kwargs.get("top_p", 0.95),
|
||||||
"repeat_penalty": kwargs.get("repeat_penalty", 0.8),
|
"repeat_penalty": kwargs.get("repeat_penalty", 0.8),
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue